6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | // SPDX-License-Identifier: GPL-2.0-only /* * ebt_dnat * * Authors: * Bart De Schuymer <bdschuym@pandora.be> * * June, 2002 * */ #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; if (skb_ensure_writable(skb, 0)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); if (is_multicast_ether_addr(info->mac)) { if (is_broadcast_ether_addr(info->mac)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; } else { const struct net_device *dev; switch (xt_hooknum(par)) { case NF_BR_BROUTING: dev = xt_in(par); break; case NF_BR_PRE_ROUTING: dev = br_port_get_rcu(xt_in(par))->br->dev; break; default: dev = NULL; break; } if (!dev) /* NF_BR_LOCAL_OUT */ return info->target; if (ether_addr_equal(info->mac, dev->dev_addr)) skb->pkt_type = PACKET_HOST; else skb->pkt_type = PACKET_OTHERHOST; } return info->target; } static int ebt_dnat_tg_check(const struct xt_tgchk_param *par) { const struct ebt_nat_info *info = par->targinfo; unsigned int hook_mask; if (BASE_CHAIN && info->target == EBT_RETURN) return -EINVAL; hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); if ((strcmp(par->table, "nat") != 0 || (hook_mask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) && (strcmp(par->table, "broute") != 0 || hook_mask & ~(1 << NF_BR_BROUTING))) return -EINVAL; if (ebt_invalid_target(info->target)) return -EINVAL; return 0; } static struct xt_target ebt_dnat_tg_reg __read_mostly = { .name = "dnat", .revision = 0, .family = NFPROTO_BRIDGE, .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING), .target = ebt_dnat_tg, .checkentry = ebt_dnat_tg_check, .targetsize = sizeof(struct ebt_nat_info), .me = THIS_MODULE, }; static int __init ebt_dnat_init(void) { return xt_register_target(&ebt_dnat_tg_reg); } static void __exit ebt_dnat_fini(void) { xt_unregister_target(&ebt_dnat_tg_reg); } module_init(ebt_dnat_init); module_exit(ebt_dnat_fini); MODULE_DESCRIPTION("Ebtables: Destination MAC address translation"); MODULE_LICENSE("GPL"); |
32 413 400 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/truncate.h * * Common inline functions needed for truncate support */ /* * Truncate blocks that were not used by write. We have to truncate the * pagecache as well so that corresponding buffers get properly unmapped. */ static inline void ext4_truncate_failed_write(struct inode *inode) { struct address_space *mapping = inode->i_mapping; /* * We don't need to call ext4_break_layouts() because the blocks we * are truncating were never visible to userspace. */ filemap_invalidate_lock(mapping); truncate_inode_pages(mapping, inode->i_size); ext4_truncate(inode); filemap_invalidate_unlock(mapping); } /* * Work out how many blocks we need to proceed with the next chunk of a * truncate transaction. */ static inline unsigned long ext4_blocks_for_truncate(struct inode *inode) { ext4_lblk_t needed; needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); /* Give ourselves just enough room to cope with inodes in which * i_blocks is corrupt: we've seen disk corruptions in the past * which resulted in random data in an inode which looked enough * like a regular file for ext4 to try to delete it. Things * will go a bit crazy if that happens, but at least we should * try not to panic the whole kernel. */ if (needed < 2) needed = 2; /* But we need to bound the transaction so we don't overflow the * journal. */ if (needed > EXT4_MAX_TRANS_DATA) needed = EXT4_MAX_TRANS_DATA; return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; } |
35 35 35 35 35 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. * * This file contains power management functions related to interrupts. */ #include <linux/irq.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> #include "internals.h" bool irq_pm_check_wakeup(struct irq_desc *desc) { if (irqd_is_wakeup_armed(&desc->irq_data)) { irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; desc->depth++; irq_disable(desc); pm_system_irq_wakeup(irq_desc_get_irq(desc)); return true; } return false; } /* * Called from __setup_irq() with desc->lock held after @action has * been installed in the action chain. */ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions++; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth++; WARN_ON_ONCE(desc->force_resume_depth && desc->force_resume_depth != desc->nr_actions); if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth++; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && (desc->no_suspend_depth + desc->cond_suspend_depth) != desc->nr_actions); } /* * Called from __free_irq() with desc->lock held after @action has * been removed from the action chain. */ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions--; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth--; if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth--; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth--; } static bool suspend_device_irq(struct irq_desc *desc) { unsigned long chipflags = irq_desc_get_chip(desc)->flags; struct irq_data *irqd = &desc->irq_data; if (!desc->action || irq_desc_is_chained(desc) || desc->no_suspend_depth) return false; if (irqd_is_wakeup_set(irqd)) { irqd_set(irqd, IRQD_WAKEUP_ARMED); if ((chipflags & IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND) && irqd_irq_disabled(irqd)) { /* * Interrupt marked for wakeup is in disabled state. * Enable interrupt here to unmask/enable in irqchip * to be able to resume with such interrupts. */ __enable_irq(desc); irqd_set(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } /* * We return true here to force the caller to issue * synchronize_irq(). We need to make sure that the * IRQD_WAKEUP_ARMED is visible before we return from * suspend_device_irqs(). */ return true; } desc->istate |= IRQS_SUSPENDED; __disable_irq(desc); /* * Hardware which has no wakeup source configuration facility * requires that the non wakeup interrupts are masked at the * chip level. The chip implementation indicates that with * IRQCHIP_MASK_ON_SUSPEND. */ if (chipflags & IRQCHIP_MASK_ON_SUSPEND) mask_irq(desc); return true; } /** * suspend_device_irqs - disable all currently enabled interrupt lines * * During system-wide suspend or hibernation device drivers need to be * prevented from receiving interrupts and this function is provided * for this purpose. * * So we disable all interrupts and mark them IRQS_SUSPENDED except * for those which are unused, those which are marked as not * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND * set and those which are marked as active wakeup sources. * * The active wakeup sources are handled by the flow handler entry * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the * interrupt and notifies the pm core about the wakeup. */ void suspend_device_irqs(void) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { unsigned long flags; bool sync; if (irq_settings_is_nested_thread(desc)) continue; raw_spin_lock_irqsave(&desc->lock, flags); sync = suspend_device_irq(desc); raw_spin_unlock_irqrestore(&desc->lock, flags); if (sync) synchronize_irq(irq); } } static void resume_irq(struct irq_desc *desc) { struct irq_data *irqd = &desc->irq_data; irqd_clear(irqd, IRQD_WAKEUP_ARMED); if (irqd_is_enabled_on_suspend(irqd)) { /* * Interrupt marked for wakeup was enabled during suspend * entry. Disable such interrupts to restore them back to * original state. */ __disable_irq(desc); irqd_clear(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } if (desc->istate & IRQS_SUSPENDED) goto resume; /* Force resume the interrupt? */ if (!desc->force_resume_depth) return; /* Pretend that it got disabled ! */ desc->depth++; irq_state_set_disabled(desc); irq_state_set_masked(desc); resume: desc->istate &= ~IRQS_SUSPENDED; __enable_irq(desc); } static void resume_irqs(bool want_early) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { unsigned long flags; bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME; if (!is_early && want_early) continue; if (irq_settings_is_nested_thread(desc)) continue; raw_spin_lock_irqsave(&desc->lock, flags); resume_irq(desc); raw_spin_unlock_irqrestore(&desc->lock, flags); } } /** * rearm_wake_irq - rearm a wakeup interrupt line after signaling wakeup * @irq: Interrupt to rearm */ void rearm_wake_irq(unsigned int irq) { unsigned long flags; struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); if (!desc) return; if (!(desc->istate & IRQS_SUSPENDED) || !irqd_is_wakeup_set(&desc->irq_data)) goto unlock; desc->istate &= ~IRQS_SUSPENDED; irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); __enable_irq(desc); unlock: irq_put_desc_busunlock(desc, flags); } /** * irq_pm_syscore_resume - enable interrupt lines early * * Enable all interrupt lines with %IRQF_EARLY_RESUME set. */ static void irq_pm_syscore_resume(void) { resume_irqs(true); } static struct syscore_ops irq_pm_syscore_ops = { .resume = irq_pm_syscore_resume, }; static int __init irq_pm_init_ops(void) { register_syscore_ops(&irq_pm_syscore_ops); return 0; } device_initcall(irq_pm_init_ops); /** * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() * * Enable all non-%IRQF_EARLY_RESUME interrupt lines previously * disabled by suspend_device_irqs() that have the IRQS_SUSPENDED flag * set as well as those with %IRQF_FORCE_RESUME. */ void resume_device_irqs(void) { resume_irqs(false); } |
4 4 1449 2 7 4 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * SHA1 Secure Hash Algorithm. * * Derived from cryptoapi implementation, adapted for in-place * scatterlist interface. * * Copyright (c) Alan Smithee. * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> */ #include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/byteorder.h> const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE] = { 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8, 0x07, 0x09 }; EXPORT_SYMBOL_GPL(sha1_zero_message_hash); static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src, int blocks) { u32 temp[SHA1_WORKSPACE_WORDS]; while (blocks--) { sha1_transform(sst->state, src, temp); src += SHA1_BLOCK_SIZE; } memzero_explicit(temp, sizeof(temp)); } int crypto_sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_base_do_update(desc, data, len, sha1_generic_block_fn); } EXPORT_SYMBOL(crypto_sha1_update); static int sha1_final(struct shash_desc *desc, u8 *out) { sha1_base_do_finalize(desc, sha1_generic_block_fn); return sha1_base_finish(desc, out); } int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { sha1_base_do_update(desc, data, len, sha1_generic_block_fn); return sha1_final(desc, out); } EXPORT_SYMBOL(crypto_sha1_finup); static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = crypto_sha1_update, .final = sha1_final, .finup = crypto_sha1_finup, .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name= "sha1-generic", .cra_priority = 100, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int __init sha1_generic_mod_init(void) { return crypto_register_shash(&alg); } static void __exit sha1_generic_mod_fini(void) { crypto_unregister_shash(&alg); } subsys_initcall(sha1_generic_mod_init); module_exit(sha1_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); MODULE_ALIAS_CRYPTO("sha1"); MODULE_ALIAS_CRYPTO("sha1-generic"); |
8 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #ifndef CURVE25519_H #define CURVE25519_H #include <crypto/algapi.h> // For crypto_memneq. #include <linux/types.h> #include <linux/random.h> enum curve25519_lengths { CURVE25519_KEY_SIZE = 32 }; extern const u8 curve25519_null_point[]; extern const u8 curve25519_base_point[]; void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], const u8 scalar[CURVE25519_KEY_SIZE], const u8 point[CURVE25519_KEY_SIZE]); void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], const u8 scalar[CURVE25519_KEY_SIZE], const u8 point[CURVE25519_KEY_SIZE]); void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]); bool curve25519_selftest(void); static inline bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) curve25519_arch(mypublic, secret, basepoint); else curve25519_generic(mypublic, secret, basepoint); return crypto_memneq(mypublic, curve25519_null_point, CURVE25519_KEY_SIZE); } static inline bool __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]) { if (unlikely(!crypto_memneq(secret, curve25519_null_point, CURVE25519_KEY_SIZE))) return false; if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) curve25519_base_arch(pub, secret); else curve25519_generic(pub, secret, curve25519_base_point); return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); } static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE]) { secret[0] &= 248; secret[31] = (secret[31] & 127) | 64; } static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]) { get_random_bytes_wait(secret, CURVE25519_KEY_SIZE); curve25519_clamp_secret(secret); } #endif /* CURVE25519_H */ |
1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 | /* * linux/fs/nls/mac-celtic.c * * Charset macceltic translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ /* * COPYRIGHT AND PERMISSION NOTICE * * Copyright 1991-2012 Unicode, Inc. All rights reserved. Distributed under * the Terms of Use in http://www.unicode.org/copyright.html. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of the Unicode data files and any associated documentation (the "Data * Files") or Unicode software and any associated documentation (the * "Software") to deal in the Data Files or Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, and/or sell copies of the Data Files or Software, and * to permit persons to whom the Data Files or Software are furnished to do * so, provided that (a) the above copyright notice(s) and this permission * notice appear with all copies of the Data Files or Software, (b) both the * above copyright notice(s) and this permission notice appear in associated * documentation, and (c) there is clear notice in each modified Data File or * in the Software as well as in the documentation associated with the Data * File(s) or Software that the data or software has been modified. * * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THE DATA FILES OR SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or other * dealings in these Data Files or Software without prior written * authorization of the copyright holder. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80 */ 0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1, 0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8, /* 0x90 */ 0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3, 0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc, /* 0xa0 */ 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8, /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211, 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8, /* 0xc0 */ 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153, /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, 0x00ff, 0x0178, 0x2044, 0x20ac, 0x2039, 0x203a, 0x0176, 0x0177, /* 0xe0 */ 0x2021, 0x00b7, 0x1ef2, 0x1ef3, 0x2030, 0x00c2, 0x00ca, 0x00c1, 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4, /* 0xf0 */ 0x2663, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x00dd, 0x00fd, 0x0174, 0x0175, 0x1e84, 0x1e85, 0x1e80, 0x1e81, 0x1e82, 0x1e83, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xca, 0xc1, 0xa2, 0xa3, 0x00, 0xb4, 0x00, 0xa4, /* 0xa0-0xa7 */ 0xac, 0xa9, 0xbb, 0xc7, 0xc2, 0x00, 0xa8, 0x00, /* 0xa8-0xaf */ 0xa1, 0xb1, 0x00, 0x00, 0xab, 0xb5, 0xa6, 0xe1, /* 0xb0-0xb7 */ 0x00, 0x00, 0xbc, 0xc8, 0x00, 0x00, 0x00, 0xc0, /* 0xb8-0xbf */ 0xcb, 0xe7, 0xe5, 0xcc, 0x80, 0x81, 0xae, 0x82, /* 0xc0-0xc7 */ 0xe9, 0x83, 0xe6, 0xe8, 0xed, 0xea, 0xeb, 0xec, /* 0xc8-0xcf */ 0x00, 0x84, 0xf1, 0xee, 0xef, 0xcd, 0x85, 0x00, /* 0xd0-0xd7 */ 0xaf, 0xf4, 0xf2, 0xf3, 0x86, 0xf6, 0x00, 0xa7, /* 0xd8-0xdf */ 0x88, 0x87, 0x89, 0x8b, 0x8a, 0x8c, 0xbe, 0x8d, /* 0xe0-0xe7 */ 0x8f, 0x8e, 0x90, 0x91, 0x93, 0x92, 0x94, 0x95, /* 0xe8-0xef */ 0x00, 0x96, 0x98, 0x97, 0x99, 0x9b, 0x9a, 0xd6, /* 0xf0-0xf7 */ 0xbf, 0x9d, 0x9c, 0x9e, 0x9f, 0xf7, 0x00, 0xd8, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0xce, 0xcf, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0xf8, 0xf9, 0xde, 0xdf, /* 0x70-0x77 */ 0xd9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0xbd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0xb9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page1e[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xfc, 0xfd, 0xfe, 0xff, 0xfa, 0xfb, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0xe2, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0xd0, 0xd1, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd4, 0xd5, 0x00, 0x00, 0xd2, 0xd3, 0x00, 0x00, /* 0x18-0x1f */ 0xa0, 0xe0, 0xa5, 0x00, 0x00, 0x00, 0xc9, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0xdc, 0xdd, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page21[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0xc6, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, /* 0x08-0x0f */ 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, 0xb0, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0xba, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xc5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xad, 0x00, 0x00, 0x00, 0xb2, 0xb3, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page25[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0xd7, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page26[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page1e, NULL, page20, page21, page22, NULL, NULL, page25, page26, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x00-0x07 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x08-0x0f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x10-0x17 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x18-0x1f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x20-0x27 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x28-0x2f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x30-0x37 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x38-0x3f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x40-0x47 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x48-0x4f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x50-0x57 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x58-0x5f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x60-0x67 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x68-0x6f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x70-0x77 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x78-0x7f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x80-0x87 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x88-0x8f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x90-0x97 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x98-0x9f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xa0-0xa7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xa8-0xaf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xb0-0xb7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xb8-0xbf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xc0-0xc7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xc8-0xcf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xd0-0xd7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xd8-0xdf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xe0-0xe7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xe8-0xef */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0-0xf7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "macceltic", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_macceltic(void) { return register_nls(&table); } static void __exit exit_nls_macceltic(void) { unregister_nls(&table); } module_init(init_nls_macceltic) module_exit(exit_nls_macceltic) MODULE_LICENSE("Dual BSD/GPL"); |
1961 3357 23 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM filemap #if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_FILEMAP_H #include <linux/types.h> #include <linux/tracepoint.h> #include <linux/mm.h> #include <linux/memcontrol.h> #include <linux/device.h> #include <linux/kdev_t.h> #include <linux/errseq.h> DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, TP_PROTO(struct folio *folio), TP_ARGS(folio), TP_STRUCT__entry( __field(unsigned long, pfn) __field(unsigned long, i_ino) __field(unsigned long, index) __field(dev_t, s_dev) __field(unsigned char, order) ), TP_fast_assign( __entry->pfn = folio_pfn(folio); __entry->i_ino = folio->mapping->host->i_ino; __entry->index = folio->index; if (folio->mapping->host->i_sb) __entry->s_dev = folio->mapping->host->i_sb->s_dev; else __entry->s_dev = folio->mapping->host->i_rdev; __entry->order = folio_order(folio); ), TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, __entry->pfn, __entry->index << PAGE_SHIFT, __entry->order) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, TP_PROTO(struct folio *folio), TP_ARGS(folio) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, TP_PROTO(struct folio *folio), TP_ARGS(folio) ); TRACE_EVENT(filemap_set_wb_err, TP_PROTO(struct address_space *mapping, errseq_t eseq), TP_ARGS(mapping, eseq), TP_STRUCT__entry( __field(unsigned long, i_ino) __field(dev_t, s_dev) __field(errseq_t, errseq) ), TP_fast_assign( __entry->i_ino = mapping->host->i_ino; __entry->errseq = eseq; if (mapping->host->i_sb) __entry->s_dev = mapping->host->i_sb->s_dev; else __entry->s_dev = mapping->host->i_rdev; ), TP_printk("dev=%d:%d ino=0x%lx errseq=0x%x", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, __entry->errseq) ); TRACE_EVENT(file_check_and_advance_wb_err, TP_PROTO(struct file *file, errseq_t old), TP_ARGS(file, old), TP_STRUCT__entry( __field(struct file *, file) __field(unsigned long, i_ino) __field(dev_t, s_dev) __field(errseq_t, old) __field(errseq_t, new) ), TP_fast_assign( __entry->file = file; __entry->i_ino = file->f_mapping->host->i_ino; if (file->f_mapping->host->i_sb) __entry->s_dev = file->f_mapping->host->i_sb->s_dev; else __entry->s_dev = file->f_mapping->host->i_rdev; __entry->old = old; __entry->new = file->f_wb_err; ), TP_printk("file=%p dev=%d:%d ino=0x%lx old=0x%x new=0x%x", __entry->file, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, __entry->old, __entry->new) ); #endif /* _TRACE_FILEMAP_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* V4L2 device support header. Copyright (C) 2008 Hans Verkuil <hverkuil@xs4all.nl> */ #ifndef _V4L2_DEVICE_H #define _V4L2_DEVICE_H #include <media/media-device.h> #include <media/v4l2-subdev.h> #include <media/v4l2-dev.h> struct v4l2_ctrl_handler; /** * struct v4l2_device - main struct to for V4L2 device drivers * * @dev: pointer to struct device. * @mdev: pointer to struct media_device, may be NULL. * @subdevs: used to keep track of the registered subdevs * @lock: lock this struct; can be used by the driver as well * if this struct is embedded into a larger struct. * @name: unique device name, by default the driver name + bus ID * @notify: notify operation called by some sub-devices. * @ctrl_handler: The control handler. May be %NULL. * @prio: Device's priority state * @ref: Keep track of the references to this struct. * @release: Release function that is called when the ref count * goes to 0. * * Each instance of a V4L2 device should create the v4l2_device struct, * either stand-alone or embedded in a larger struct. * * It allows easy access to sub-devices (see v4l2-subdev.h) and provides * basic V4L2 device-level support. * * .. note:: * * #) @dev->driver_data points to this struct. * #) @dev might be %NULL if there is no parent device */ struct v4l2_device { struct device *dev; struct media_device *mdev; struct list_head subdevs; spinlock_t lock; char name[36]; void (*notify)(struct v4l2_subdev *sd, unsigned int notification, void *arg); struct v4l2_ctrl_handler *ctrl_handler; struct v4l2_prio_state prio; struct kref ref; void (*release)(struct v4l2_device *v4l2_dev); }; /** * v4l2_device_get - gets a V4L2 device reference * * @v4l2_dev: pointer to struct &v4l2_device * * This is an ancillary routine meant to increment the usage for the * struct &v4l2_device pointed by @v4l2_dev. */ static inline void v4l2_device_get(struct v4l2_device *v4l2_dev) { kref_get(&v4l2_dev->ref); } /** * v4l2_device_put - puts a V4L2 device reference * * @v4l2_dev: pointer to struct &v4l2_device * * This is an ancillary routine meant to decrement the usage for the * struct &v4l2_device pointed by @v4l2_dev. */ int v4l2_device_put(struct v4l2_device *v4l2_dev); /** * v4l2_device_register - Initialize v4l2_dev and make @dev->driver_data * point to @v4l2_dev. * * @dev: pointer to struct &device * @v4l2_dev: pointer to struct &v4l2_device * * .. note:: * @dev may be %NULL in rare cases (ISA devices). * In such case the caller must fill in the @v4l2_dev->name field * before calling this function. */ int __must_check v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); /** * v4l2_device_set_name - Optional function to initialize the * name field of struct &v4l2_device * * @v4l2_dev: pointer to struct &v4l2_device * @basename: base name for the device name * @instance: pointer to a static atomic_t var with the instance usage for * the device driver. * * v4l2_device_set_name() initializes the name field of struct &v4l2_device * using the driver name and a driver-global atomic_t instance. * * This function will increment the instance counter and returns the * instance value used in the name. * * Example: * * static atomic_t drv_instance = ATOMIC_INIT(0); * * ... * * instance = v4l2_device_set_name(&\ v4l2_dev, "foo", &\ drv_instance); * * The first time this is called the name field will be set to foo0 and * this function returns 0. If the name ends with a digit (e.g. cx18), * then the name will be set to cx18-0 since cx180 would look really odd. */ int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename, atomic_t *instance); /** * v4l2_device_disconnect - Change V4L2 device state to disconnected. * * @v4l2_dev: pointer to struct v4l2_device * * Should be called when the USB parent disconnects. * Since the parent disappears, this ensures that @v4l2_dev doesn't have * an invalid parent pointer. * * .. note:: This function sets @v4l2_dev->dev to NULL. */ void v4l2_device_disconnect(struct v4l2_device *v4l2_dev); /** * v4l2_device_unregister - Unregister all sub-devices and any other * resources related to @v4l2_dev. * * @v4l2_dev: pointer to struct v4l2_device */ void v4l2_device_unregister(struct v4l2_device *v4l2_dev); /** * v4l2_device_register_subdev - Registers a subdev with a v4l2 device. * * @v4l2_dev: pointer to struct &v4l2_device * @sd: pointer to &struct v4l2_subdev * * While registered, the subdev module is marked as in-use. * * An error is returned if the module is no longer loaded on any attempts * to register it. */ int __must_check v4l2_device_register_subdev(struct v4l2_device *v4l2_dev, struct v4l2_subdev *sd); /** * v4l2_device_unregister_subdev - Unregisters a subdev with a v4l2 device. * * @sd: pointer to &struct v4l2_subdev * * .. note :: * * Can also be called if the subdev wasn't registered. In such * case, it will do nothing. */ void v4l2_device_unregister_subdev(struct v4l2_subdev *sd); /** * __v4l2_device_register_subdev_nodes - Registers device nodes for * all subdevs of the v4l2 device that are marked with the * %V4L2_SUBDEV_FL_HAS_DEVNODE flag. * * @v4l2_dev: pointer to struct v4l2_device * @read_only: subdevices read-only flag. True to register the subdevices * device nodes in read-only mode, false to allow full access to the * subdevice userspace API. */ int __must_check __v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev, bool read_only); /** * v4l2_device_register_subdev_nodes - Registers subdevices device nodes with * unrestricted access to the subdevice userspace operations * * Internally calls __v4l2_device_register_subdev_nodes(). See its documentation * for more details. * * @v4l2_dev: pointer to struct v4l2_device */ static inline int __must_check v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev) { #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API) return __v4l2_device_register_subdev_nodes(v4l2_dev, false); #else return 0; #endif } /** * v4l2_device_register_ro_subdev_nodes - Registers subdevices device nodes * in read-only mode * * Internally calls __v4l2_device_register_subdev_nodes(). See its documentation * for more details. * * @v4l2_dev: pointer to struct v4l2_device */ static inline int __must_check v4l2_device_register_ro_subdev_nodes(struct v4l2_device *v4l2_dev) { #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API) return __v4l2_device_register_subdev_nodes(v4l2_dev, true); #else return 0; #endif } /** * v4l2_subdev_notify - Sends a notification to v4l2_device. * * @sd: pointer to &struct v4l2_subdev * @notification: type of notification. Please notice that the notification * type is driver-specific. * @arg: arguments for the notification. Those are specific to each * notification type. */ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, unsigned int notification, void *arg) { if (sd && sd->v4l2_dev && sd->v4l2_dev->notify) sd->v4l2_dev->notify(sd, notification, arg); } /** * v4l2_device_supports_requests - Test if requests are supported. * * @v4l2_dev: pointer to struct v4l2_device */ static inline bool v4l2_device_supports_requests(struct v4l2_device *v4l2_dev) { return v4l2_dev->mdev && v4l2_dev->mdev->ops && v4l2_dev->mdev->ops->req_queue; } /* Helper macros to iterate over all subdevs. */ /** * v4l2_device_for_each_subdev - Helper macro that interates over all * sub-devices of a given &v4l2_device. * * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev pointer used as an iterator by the loop. * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * * This macro iterates over all sub-devices owned by the @v4l2_dev device. * It acts as a for loop iterator and executes the next statement with * the @sd variable pointing to each sub-device in turn. */ #define v4l2_device_for_each_subdev(sd, v4l2_dev) \ list_for_each_entry(sd, &(v4l2_dev)->subdevs, list) /** * __v4l2_device_call_subdevs_p - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev pointer used as an iterator by the loop. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_p(v4l2_dev, sd, cond, o, f, args...) \ do { \ list_for_each_entry((sd), &(v4l2_dev)->subdevs, list) \ if ((cond) && (sd)->ops->o && (sd)->ops->o->f) \ (sd)->ops->o->f((sd) , ##args); \ } while (0) /** * __v4l2_device_call_subdevs - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs(v4l2_dev, cond, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, cond, o, \ f , ##args); \ } while (0) /** * __v4l2_device_call_subdevs_until_err_p - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev sub-devices associated with @v4l2_dev. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, zero * otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_until_err_p(v4l2_dev, sd, cond, o, f, args...) \ ({ \ long __err = 0; \ \ list_for_each_entry((sd), &(v4l2_dev)->subdevs, list) { \ if ((cond) && (sd)->ops->o && (sd)->ops->o->f) \ __err = (sd)->ops->o->f((sd) , ##args); \ if (__err && __err != -ENOIOCTLCMD) \ break; \ } \ (__err == -ENOIOCTLCMD) ? 0 : __err; \ }) /** * __v4l2_device_call_subdevs_until_err - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_until_err(v4l2_dev, cond, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, cond, o, \ f , ##args); \ }) /** * v4l2_device_call_all - Calls the specified operation for * all subdevs matching the &v4l2_subdev.grp_id, as assigned * by the bridge driver. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_call_all(v4l2_dev, grpid, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) /** * v4l2_device_call_until_err - Calls the specified operation for * all subdevs matching the &v4l2_subdev.grp_id, as assigned * by the bridge driver, until an error occurs. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_call_until_err(v4l2_dev, grpid, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) /** * v4l2_device_mask_call_all - Calls the specified operation for * all subdevices where a group ID matches a specified bitmask. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_mask_call_all(v4l2_dev, grpmsk, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ f , ##args); \ } while (0) /** * v4l2_device_mask_call_until_err - Calls the specified operation for * all subdevices where a group ID matches a specified bitmask. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_mask_call_until_err(v4l2_dev, grpmsk, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ f , ##args); \ }) /** * v4l2_device_has_op - checks if any subdev with matching grpid has a * given ops. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. */ #define v4l2_device_has_op(v4l2_dev, grpid, o, f) \ ({ \ struct v4l2_subdev *__sd; \ bool __result = false; \ list_for_each_entry(__sd, &(v4l2_dev)->subdevs, list) { \ if ((grpid) && __sd->grp_id != (grpid)) \ continue; \ if (v4l2_subdev_has_op(__sd, o, f)) { \ __result = true; \ break; \ } \ } \ __result; \ }) /** * v4l2_device_mask_has_op - checks if any subdev with matching group * mask has a given ops. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. */ #define v4l2_device_mask_has_op(v4l2_dev, grpmsk, o, f) \ ({ \ struct v4l2_subdev *__sd; \ bool __result = false; \ list_for_each_entry(__sd, &(v4l2_dev)->subdevs, list) { \ if ((grpmsk) && !(__sd->grp_id & (grpmsk))) \ continue; \ if (v4l2_subdev_has_op(__sd, o, f)) { \ __result = true; \ break; \ } \ } \ __result; \ }) #endif |
9 1 8 7 2 11 22 9 9 13 8 8 137 112 148 148 148 148 155 155 155 115 148 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "device.h" #include "peer.h" #include "socket.h" #include "queueing.h" #include "messages.h" #include <linux/ctype.h> #include <linux/net.h> #include <linux/if_vlan.h> #include <linux/if_ether.h> #include <linux/inetdevice.h> #include <net/udp_tunnel.h> #include <net/ipv6.h> static int send4(struct wg_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache) { struct flowi4 fl = { .saddr = endpoint->src4.s_addr, .daddr = endpoint->addr4.sin_addr.s_addr, .fl4_dport = endpoint->addr4.sin_port, .flowi4_mark = wg->fwmark, .flowi4_proto = IPPROTO_UDP }; struct rtable *rt = NULL; struct sock *sock; int ret = 0; skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; rcu_read_lock_bh(); sock = rcu_dereference_bh(wg->sock4); if (unlikely(!sock)) { ret = -ENONET; goto err; } fl.fl4_sport = inet_sk(sock)->inet_sport; if (cache) rt = dst_cache_get_ip4(cache, &fl.saddr); if (!rt) { security_sk_classify_flow(sock, flowi4_to_flowi_common(&fl)); if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) { endpoint->src4.s_addr = 0; endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); } rt = ip_route_output_flow(sock_net(sock), &fl, sock); if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && rt->dst.dev->ifindex != endpoint->src_if4)))) { endpoint->src4.s_addr = 0; endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); if (!IS_ERR(rt)) ip_rt_put(rt); rt = ip_route_output_flow(sock_net(sock), &fl, sock); } if (IS_ERR(rt)) { ret = PTR_ERR(rt); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); goto err; } if (cache) dst_cache_set_ip4(cache, &rt->dst, fl.saddr); } skb->ignore_df = 1; udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, fl.fl4_dport, false, false); goto out; err: kfree_skb(skb); out: rcu_read_unlock_bh(); return ret; } static int send6(struct wg_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache) { #if IS_ENABLED(CONFIG_IPV6) struct flowi6 fl = { .saddr = endpoint->src6, .daddr = endpoint->addr6.sin6_addr, .fl6_dport = endpoint->addr6.sin6_port, .flowi6_mark = wg->fwmark, .flowi6_oif = endpoint->addr6.sin6_scope_id, .flowi6_proto = IPPROTO_UDP /* TODO: addr->sin6_flowinfo */ }; struct dst_entry *dst = NULL; struct sock *sock; int ret = 0; skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; rcu_read_lock_bh(); sock = rcu_dereference_bh(wg->sock6); if (unlikely(!sock)) { ret = -ENONET; goto err; } fl.fl6_sport = inet_sk(sock)->inet_sport; if (cache) dst = dst_cache_get_ip6(cache, &fl.saddr); if (!dst) { security_sk_classify_flow(sock, flowi6_to_flowi_common(&fl)); if (unlikely(!ipv6_addr_any(&fl.saddr) && !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { endpoint->src6 = fl.saddr = in6addr_any; if (cache) dst_cache_reset(cache); } dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, NULL); if (IS_ERR(dst)) { ret = PTR_ERR(dst); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); goto err; } if (cache) dst_cache_set_ip6(cache, dst, &fl.saddr); } skb->ignore_df = 1; udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, ip6_dst_hoplimit(dst), 0, fl.fl6_sport, fl.fl6_dport, false); goto out; err: kfree_skb(skb); out: rcu_read_unlock_bh(); return ret; #else kfree_skb(skb); return -EAFNOSUPPORT; #endif } int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) { size_t skb_len = skb->len; int ret = -EAFNOSUPPORT; read_lock_bh(&peer->endpoint_lock); if (peer->endpoint.addr.sa_family == AF_INET) ret = send4(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache); else if (peer->endpoint.addr.sa_family == AF_INET6) ret = send6(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache); else dev_kfree_skb(skb); if (likely(!ret)) peer->tx_bytes += skb_len; read_unlock_bh(&peer->endpoint_lock); return ret; } int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, size_t len, u8 ds) { struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); if (unlikely(!skb)) return -ENOMEM; skb_reserve(skb, SKB_HEADER_LEN); skb_set_inner_network_header(skb, 0); skb_put_data(skb, buffer, len); return wg_socket_send_skb_to_peer(peer, skb, ds); } int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, struct sk_buff *in_skb, void *buffer, size_t len) { int ret = 0; struct sk_buff *skb; struct endpoint endpoint; if (unlikely(!in_skb)) return -EINVAL; ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); if (unlikely(ret < 0)) return ret; skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); if (unlikely(!skb)) return -ENOMEM; skb_reserve(skb, SKB_HEADER_LEN); skb_set_inner_network_header(skb, 0); skb_put_data(skb, buffer, len); if (endpoint.addr.sa_family == AF_INET) ret = send4(wg, skb, &endpoint, 0, NULL); else if (endpoint.addr.sa_family == AF_INET6) ret = send6(wg, skb, &endpoint, 0, NULL); /* No other possibilities if the endpoint is valid, which it is, * as we checked above. */ return ret; } int wg_socket_endpoint_from_skb(struct endpoint *endpoint, const struct sk_buff *skb) { memset(endpoint, 0, sizeof(*endpoint)); if (skb->protocol == htons(ETH_P_IP)) { endpoint->addr4.sin_family = AF_INET; endpoint->addr4.sin_port = udp_hdr(skb)->source; endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; endpoint->src4.s_addr = ip_hdr(skb)->daddr; endpoint->src_if4 = skb->skb_iif; } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { endpoint->addr6.sin6_family = AF_INET6; endpoint->addr6.sin6_port = udp_hdr(skb)->source; endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( &ipv6_hdr(skb)->saddr, skb->skb_iif); endpoint->src6 = ipv6_hdr(skb)->daddr; } else { return -EINVAL; } return 0; } static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) { return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && a->addr4.sin_port == b->addr4.sin_port && a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || (a->addr.sa_family == AF_INET6 && b->addr.sa_family == AF_INET6 && a->addr6.sin6_port == b->addr6.sin6_port && ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && ipv6_addr_equal(&a->src6, &b->src6)) || unlikely(!a->addr.sa_family && !b->addr.sa_family); } void wg_socket_set_peer_endpoint(struct wg_peer *peer, const struct endpoint *endpoint) { /* First we check unlocked, in order to optimize, since it's pretty rare * that an endpoint will change. If we happen to be mid-write, and two * CPUs wind up writing the same thing or something slightly different, * it doesn't really matter much either. */ if (endpoint_eq(endpoint, &peer->endpoint)) return; write_lock_bh(&peer->endpoint_lock); if (endpoint->addr.sa_family == AF_INET) { peer->endpoint.addr4 = endpoint->addr4; peer->endpoint.src4 = endpoint->src4; peer->endpoint.src_if4 = endpoint->src_if4; } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) { peer->endpoint.addr6 = endpoint->addr6; peer->endpoint.src6 = endpoint->src6; } else { goto out; } dst_cache_reset(&peer->endpoint_cache); out: write_unlock_bh(&peer->endpoint_lock); } void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, const struct sk_buff *skb) { struct endpoint endpoint; if (!wg_socket_endpoint_from_skb(&endpoint, skb)) wg_socket_set_peer_endpoint(peer, &endpoint); } void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) { write_lock_bh(&peer->endpoint_lock); memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); dst_cache_reset_now(&peer->endpoint_cache); write_unlock_bh(&peer->endpoint_lock); } static int wg_receive(struct sock *sk, struct sk_buff *skb) { struct wg_device *wg; if (unlikely(!sk)) goto err; wg = sk->sk_user_data; if (unlikely(!wg)) goto err; skb_mark_not_on_list(skb); wg_packet_receive(wg, skb); return 0; err: kfree_skb(skb); return 0; } static void sock_free(struct sock *sock) { if (unlikely(!sock)) return; sk_clear_memalloc(sock); udp_tunnel_sock_release(sock->sk_socket); } static void set_sock_opts(struct socket *sock) { sock->sk->sk_allocation = GFP_ATOMIC; sock->sk->sk_sndbuf = INT_MAX; sk_set_memalloc(sock->sk); } int wg_socket_init(struct wg_device *wg, u16 port) { struct net *net; int ret; struct udp_tunnel_sock_cfg cfg = { .sk_user_data = wg, .encap_type = 1, .encap_rcv = wg_receive }; struct socket *new4 = NULL, *new6 = NULL; struct udp_port_cfg port4 = { .family = AF_INET, .local_ip.s_addr = htonl(INADDR_ANY), .local_udp_port = htons(port), .use_udp_checksums = true }; #if IS_ENABLED(CONFIG_IPV6) int retries = 0; struct udp_port_cfg port6 = { .family = AF_INET6, .local_ip6 = IN6ADDR_ANY_INIT, .use_udp6_tx_checksums = true, .use_udp6_rx_checksums = true, .ipv6_v6only = true }; #endif rcu_read_lock(); net = rcu_dereference(wg->creating_net); net = net ? maybe_get_net(net) : NULL; rcu_read_unlock(); if (unlikely(!net)) return -ENONET; #if IS_ENABLED(CONFIG_IPV6) retry: #endif ret = udp_sock_create(net, &port4, &new4); if (ret < 0) { pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); goto out; } set_sock_opts(new4); setup_udp_tunnel_sock(net, new4, &cfg); #if IS_ENABLED(CONFIG_IPV6) if (ipv6_mod_enabled()) { port6.local_udp_port = inet_sk(new4->sk)->inet_sport; ret = udp_sock_create(net, &port6, &new6); if (ret < 0) { udp_tunnel_sock_release(new4); if (ret == -EADDRINUSE && !port && retries++ < 100) goto retry; pr_err("%s: Could not create IPv6 socket\n", wg->dev->name); goto out; } set_sock_opts(new6); setup_udp_tunnel_sock(net, new6, &cfg); } #endif wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); ret = 0; out: put_net(net); return ret; } void wg_socket_reinit(struct wg_device *wg, struct sock *new4, struct sock *new6) { struct sock *old4, *old6; mutex_lock(&wg->socket_update_lock); old4 = rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock)); old6 = rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock)); rcu_assign_pointer(wg->sock4, new4); rcu_assign_pointer(wg->sock6, new6); if (new4) wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); mutex_unlock(&wg->socket_update_lock); synchronize_net(); sock_free(old4); sock_free(old6); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of the Linux kernel. * * Copyright (c) 2011-2014, Intel Corporation * Authors: Fenghua Yu <fenghua.yu@intel.com>, * H. Peter Anvin <hpa@linux.intel.com> */ #ifndef ASM_X86_ARCHRANDOM_H #define ASM_X86_ARCHRANDOM_H #include <asm/processor.h> #include <asm/cpufeature.h> #define RDRAND_RETRY_LOOPS 10 /* Unconditional execution of RDRAND and RDSEED */ static inline bool __must_check rdrand_long(unsigned long *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { asm volatile("rdrand %[out]" CC_SET(c) : CC_OUT(c) (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); return false; } static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; asm volatile("rdseed %[out]" CC_SET(c) : CC_OUT(c) (ok), [out] "=r" (*v)); return ok; } /* * These are the generic interfaces; they must not be declared if the * stubs in <linux/random.h> are to be invoked. */ static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) { return max_longs && static_cpu_has(X86_FEATURE_RDRAND) && rdrand_long(v) ? 1 : 0; } static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) { return max_longs && static_cpu_has(X86_FEATURE_RDSEED) && rdseed_long(v) ? 1 : 0; } #ifndef CONFIG_UML void x86_init_rdrand(struct cpuinfo_x86 *c); #endif #endif /* ASM_X86_ARCHRANDOM_H */ |
758 772 734 647 2 544 9 518 59 5 547 780 772 10 1 32 503 1 500 18 18 1 58 473 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 | // SPDX-License-Identifier: GPL-2.0-only /* * lib/parser.c - simple parser for mount, etc. options. */ #include <linux/ctype.h> #include <linux/types.h> #include <linux/export.h> #include <linux/kstrtox.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/string.h> /* * max size needed by different bases to express U64 * HEX: "0xFFFFFFFFFFFFFFFF" --> 18 * DEC: "18446744073709551615" --> 20 * OCT: "01777777777777777777777" --> 23 * pick the max one to define NUMBER_BUF_LEN */ #define NUMBER_BUF_LEN 24 /** * match_one - Determines if a string matches a simple pattern * @s: the string to examine for presence of the pattern * @p: the string containing the pattern * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match * locations. * * Description: Determines if the pattern @p is present in string @s. Can only * match extremely simple token=arg style patterns. If the pattern is found, * the location(s) of the arguments will be returned in the @args array. */ static int match_one(char *s, const char *p, substring_t args[]) { char *meta; int argc = 0; if (!p) return 1; while(1) { int len = -1; meta = strchr(p, '%'); if (!meta) return strcmp(p, s) == 0; if (strncmp(p, s, meta-p)) return 0; s += meta - p; p = meta + 1; if (isdigit(*p)) len = simple_strtoul(p, (char **) &p, 10); else if (*p == '%') { if (*s++ != '%') return 0; p++; continue; } if (argc >= MAX_OPT_ARGS) return 0; args[argc].from = s; switch (*p++) { case 's': { size_t str_len = strlen(s); if (str_len == 0) return 0; if (len == -1 || len > str_len) len = str_len; args[argc].to = s + len; break; } case 'd': simple_strtol(s, &args[argc].to, 0); goto num; case 'u': simple_strtoul(s, &args[argc].to, 0); goto num; case 'o': simple_strtoul(s, &args[argc].to, 8); goto num; case 'x': simple_strtoul(s, &args[argc].to, 16); num: if (args[argc].to == args[argc].from) return 0; break; default: return 0; } s = args[argc].to; argc++; } } /** * match_token - Find a token (and optional args) in a string * @s: the string to examine for token/argument pairs * @table: match_table_t describing the set of allowed option tokens and the * arguments that may be associated with them. Must be terminated with a * &struct match_token whose pattern is set to the NULL pointer. * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match * locations. * * Description: Detects which if any of a set of token strings has been passed * to it. Tokens can include up to %MAX_OPT_ARGS instances of basic c-style * format identifiers which will be taken into account when matching the * tokens, and whose locations will be returned in the @args array. */ int match_token(char *s, const match_table_t table, substring_t args[]) { const struct match_token *p; for (p = table; !match_one(s, p->pattern, args) ; p++) ; return p->token; } EXPORT_SYMBOL(match_token); /** * match_number - scan a number in the given base from a substring_t * @s: substring to be scanned * @result: resulting integer on success * @base: base to use when converting string * * Description: Given a &substring_t and a base, attempts to parse the substring * as a number in that base. * * Return: On success, sets @result to the integer represented by the * string and returns 0. Returns -EINVAL or -ERANGE on failure. */ static int match_number(substring_t *s, int *result, int base) { char *endp; char buf[NUMBER_BUF_LEN]; int ret; long val; if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) return -ERANGE; ret = 0; val = simple_strtol(buf, &endp, base); if (endp == buf) ret = -EINVAL; else if (val < (long)INT_MIN || val > (long)INT_MAX) ret = -ERANGE; else *result = (int) val; return ret; } /** * match_u64int - scan a number in the given base from a substring_t * @s: substring to be scanned * @result: resulting u64 on success * @base: base to use when converting string * * Description: Given a &substring_t and a base, attempts to parse the substring * as a number in that base. * * Return: On success, sets @result to the integer represented by the * string and returns 0. Returns -EINVAL or -ERANGE on failure. */ static int match_u64int(substring_t *s, u64 *result, int base) { char buf[NUMBER_BUF_LEN]; int ret; u64 val; if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) return -ERANGE; ret = kstrtoull(buf, base, &val); if (!ret) *result = val; return ret; } /** * match_int - scan a decimal representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as a decimal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_int(substring_t *s, int *result) { return match_number(s, result, 0); } EXPORT_SYMBOL(match_int); /** * match_uint - scan a decimal representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as a decimal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_uint(substring_t *s, unsigned int *result) { char buf[NUMBER_BUF_LEN]; if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) return -ERANGE; return kstrtouint(buf, 10, result); } EXPORT_SYMBOL(match_uint); /** * match_u64 - scan a decimal representation of a u64 from * a substring_t * @s: substring_t to be scanned * @result: resulting unsigned long long on success * * Description: Attempts to parse the &substring_t @s as a long decimal * integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_u64(substring_t *s, u64 *result) { return match_u64int(s, result, 0); } EXPORT_SYMBOL(match_u64); /** * match_octal - scan an octal representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as an octal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_octal(substring_t *s, int *result) { return match_number(s, result, 8); } EXPORT_SYMBOL(match_octal); /** * match_hex - scan a hex representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as a hexadecimal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_hex(substring_t *s, int *result) { return match_number(s, result, 16); } EXPORT_SYMBOL(match_hex); /** * match_wildcard - parse if a string matches given wildcard pattern * @pattern: wildcard pattern * @str: the string to be parsed * * Description: Parse the string @str to check if matches wildcard * pattern @pattern. The pattern may contain two types of wildcards: * '*' - matches zero or more characters * '?' - matches one character * * Return: If the @str matches the @pattern, return true, else return false. */ bool match_wildcard(const char *pattern, const char *str) { const char *s = str; const char *p = pattern; bool star = false; while (*s) { switch (*p) { case '?': s++; p++; break; case '*': star = true; str = s; if (!*++p) return true; pattern = p; break; default: if (*s == *p) { s++; p++; } else { if (!star) return false; str++; s = str; p = pattern; } break; } } if (*p == '*') ++p; return !*p; } EXPORT_SYMBOL(match_wildcard); /** * match_strlcpy - Copy the characters from a substring_t to a sized buffer * @dest: where to copy to * @src: &substring_t to copy * @size: size of destination buffer * * Description: Copy the characters in &substring_t @src to the * c-style string @dest. Copy no more than @size - 1 characters, plus * the terminating NUL. * * Return: length of @src. */ size_t match_strlcpy(char *dest, const substring_t *src, size_t size) { size_t ret = src->to - src->from; if (size) { size_t len = ret >= size ? size - 1 : ret; memcpy(dest, src->from, len); dest[len] = '\0'; } return ret; } EXPORT_SYMBOL(match_strlcpy); /** * match_strdup - allocate a new string with the contents of a substring_t * @s: &substring_t to copy * * Description: Allocates and returns a string filled with the contents of * the &substring_t @s. The caller is responsible for freeing the returned * string with kfree(). * * Return: the address of the newly allocated NUL-terminated string or * %NULL on error. */ char *match_strdup(const substring_t *s) { return kmemdup_nul(s->from, s->to - s->from, GFP_KERNEL); } EXPORT_SYMBOL(match_strdup); |
1 1 1 1 1 2 2 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 | // SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) Hans Alblas PE1AYX <hans@esrac.ele.tue.nl> * Copyright (C) 2004, 05 Ralf Baechle DL5RB <ralf@linux-mips.org> * Copyright (C) 2004, 05 Thomas Osterried DL9SAU <thomas@x-berg.in-berlin.de> */ #include <linux/module.h> #include <linux/bitops.h> #include <linux/uaccess.h> #include <linux/crc16.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/major.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/jiffies.h> #include <linux/refcount.h> #include <net/ax25.h> #define AX_MTU 236 /* some arch define END as assembly function ending, just undef it */ #undef END /* SLIP/KISS protocol characters. */ #define END 0300 /* indicates end of frame */ #define ESC 0333 /* indicates byte stuffing */ #define ESC_END 0334 /* ESC ESC_END means END 'data' */ #define ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ struct mkiss { struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ /* These are pointers to the malloc()ed frame buffers. */ spinlock_t buflock;/* lock for rbuf and xbuf */ unsigned char *rbuff; /* receiver buffer */ int rcount; /* received chars counter */ unsigned char *xbuff; /* transmitter buffer */ unsigned char *xhead; /* pointer to next byte to XMIT */ int xleft; /* bytes left in XMIT queue */ /* Detailed SLIP statistics. */ int mtu; /* Our mtu (to spot changes!) */ int buffsize; /* Max buffers sizes */ unsigned long flags; /* Flag values/ mode etc */ /* long req'd: used by set_bit --RR */ #define AXF_INUSE 0 /* Channel in use */ #define AXF_ESCAPE 1 /* ESC received */ #define AXF_ERROR 2 /* Parity, etc. error */ #define AXF_KEEPTEST 3 /* Keepalive test flag */ #define AXF_OUTWAIT 4 /* is outpacket was flag */ int mode; int crcmode; /* MW: for FlexNet, SMACK etc. */ int crcauto; /* CRC auto mode */ #define CRC_MODE_NONE 0 #define CRC_MODE_FLEX 1 #define CRC_MODE_SMACK 2 #define CRC_MODE_FLEX_TEST 3 #define CRC_MODE_SMACK_TEST 4 refcount_t refcnt; struct completion dead; }; /*---------------------------------------------------------------------------*/ static const unsigned short crc_flex_table[] = { 0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38, 0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770, 0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9, 0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1, 0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a, 0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672, 0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb, 0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3, 0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c, 0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574, 0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd, 0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5, 0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e, 0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476, 0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf, 0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7, 0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30, 0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378, 0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1, 0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9, 0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32, 0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a, 0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3, 0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb, 0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34, 0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c, 0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5, 0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd, 0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36, 0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e, 0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7, 0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff }; static unsigned short calc_crc_flex(unsigned char *cp, int size) { unsigned short crc = 0xffff; while (size--) crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; return crc; } static int check_crc_flex(unsigned char *cp, int size) { unsigned short crc = 0xffff; if (size < 3) return -1; while (size--) crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; if ((crc & 0xffff) != 0x7070) return -1; return 0; } static int check_crc_16(unsigned char *cp, int size) { unsigned short crc = 0x0000; if (size < 3) return -1; crc = crc16(0, cp, size); if (crc != 0x0000) return -1; return 0; } /* * Standard encapsulation */ static int kiss_esc(unsigned char *s, unsigned char *d, int len) { unsigned char *ptr = d; unsigned char c; /* * Send an initial END character to flush out any data that may have * accumulated in the receiver due to line noise. */ *ptr++ = END; while (len-- > 0) { switch (c = *s++) { case END: *ptr++ = ESC; *ptr++ = ESC_END; break; case ESC: *ptr++ = ESC; *ptr++ = ESC_ESC; break; default: *ptr++ = c; break; } } *ptr++ = END; return ptr - d; } /* * MW: * OK its ugly, but tell me a better solution without copying the * packet to a temporary buffer :-) */ static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, int len) { unsigned char *ptr = d; unsigned char c=0; *ptr++ = END; while (len > 0) { if (len > 2) c = *s++; else if (len > 1) c = crc >> 8; else c = crc & 0xff; len--; switch (c) { case END: *ptr++ = ESC; *ptr++ = ESC_END; break; case ESC: *ptr++ = ESC; *ptr++ = ESC_ESC; break; default: *ptr++ = c; break; } } *ptr++ = END; return ptr - d; } /* Send one completely decapsulated AX.25 packet to the AX.25 layer. */ static void ax_bump(struct mkiss *ax) { struct sk_buff *skb; int count; spin_lock_bh(&ax->buflock); if (ax->rbuff[0] > 0x0f) { if (ax->rbuff[0] & 0x80) { if (check_crc_16(ax->rbuff, ax->rcount) < 0) { ax->dev->stats.rx_errors++; spin_unlock_bh(&ax->buflock); return; } if (ax->crcmode != CRC_MODE_SMACK && ax->crcauto) { printk(KERN_INFO "mkiss: %s: Switching to crc-smack\n", ax->dev->name); ax->crcmode = CRC_MODE_SMACK; } ax->rcount -= 2; *ax->rbuff &= ~0x80; } else if (ax->rbuff[0] & 0x20) { if (check_crc_flex(ax->rbuff, ax->rcount) < 0) { ax->dev->stats.rx_errors++; spin_unlock_bh(&ax->buflock); return; } if (ax->crcmode != CRC_MODE_FLEX && ax->crcauto) { printk(KERN_INFO "mkiss: %s: Switching to crc-flexnet\n", ax->dev->name); ax->crcmode = CRC_MODE_FLEX; } ax->rcount -= 2; /* * dl9sau bugfix: the trailling two bytes flexnet crc * will not be passed to the kernel. thus we have to * correct the kissparm signature, because it indicates * a crc but there's none */ *ax->rbuff &= ~0x20; } } count = ax->rcount; if ((skb = dev_alloc_skb(count)) == NULL) { printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n", ax->dev->name); ax->dev->stats.rx_dropped++; spin_unlock_bh(&ax->buflock); return; } skb_put_data(skb, ax->rbuff, count); skb->protocol = ax25_type_trans(skb, ax->dev); netif_rx(skb); ax->dev->stats.rx_packets++; ax->dev->stats.rx_bytes += count; spin_unlock_bh(&ax->buflock); } static void kiss_unesc(struct mkiss *ax, unsigned char s) { switch (s) { case END: /* drop keeptest bit = VSV */ if (test_bit(AXF_KEEPTEST, &ax->flags)) clear_bit(AXF_KEEPTEST, &ax->flags); if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2)) ax_bump(ax); clear_bit(AXF_ESCAPE, &ax->flags); ax->rcount = 0; return; case ESC: set_bit(AXF_ESCAPE, &ax->flags); return; case ESC_ESC: if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) s = ESC; break; case ESC_END: if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) s = END; break; } spin_lock_bh(&ax->buflock); if (!test_bit(AXF_ERROR, &ax->flags)) { if (ax->rcount < ax->buffsize) { ax->rbuff[ax->rcount++] = s; spin_unlock_bh(&ax->buflock); return; } ax->dev->stats.rx_over_errors++; set_bit(AXF_ERROR, &ax->flags); } spin_unlock_bh(&ax->buflock); } static int ax_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr_ax25 *sa = addr; netif_tx_lock_bh(dev); netif_addr_lock(dev); __dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); return 0; } /*---------------------------------------------------------------------------*/ static void ax_changedmtu(struct mkiss *ax) { struct net_device *dev = ax->dev; unsigned char *xbuff, *rbuff, *oxbuff, *orbuff; int len; len = dev->mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); if (xbuff == NULL || rbuff == NULL) { printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, " "MTU change cancelled.\n", ax->dev->name); dev->mtu = ax->mtu; kfree(xbuff); kfree(rbuff); return; } spin_lock_bh(&ax->buflock); oxbuff = ax->xbuff; ax->xbuff = xbuff; orbuff = ax->rbuff; ax->rbuff = rbuff; if (ax->xleft) { if (ax->xleft <= len) { memcpy(ax->xbuff, ax->xhead, ax->xleft); } else { ax->xleft = 0; dev->stats.tx_dropped++; } } ax->xhead = ax->xbuff; if (ax->rcount) { if (ax->rcount <= len) { memcpy(ax->rbuff, orbuff, ax->rcount); } else { ax->rcount = 0; dev->stats.rx_over_errors++; set_bit(AXF_ERROR, &ax->flags); } } ax->mtu = dev->mtu + 73; ax->buffsize = len; spin_unlock_bh(&ax->buflock); kfree(oxbuff); kfree(orbuff); } /* Encapsulate one AX.25 packet and stuff into a TTY queue. */ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) { struct mkiss *ax = netdev_priv(dev); unsigned char *p; int actual, count; if (ax->mtu != ax->dev->mtu + 73) /* Someone has been ifconfigging */ ax_changedmtu(ax); if (len > ax->mtu) { /* Sigh, shouldn't occur BUT ... */ printk(KERN_ERR "mkiss: %s: truncating oversized transmit packet!\n", ax->dev->name); dev->stats.tx_dropped++; netif_start_queue(dev); return; } p = icp; spin_lock_bh(&ax->buflock); if ((*p & 0x0f) != 0) { /* Configuration Command (kissparms(1). * Protocol spec says: never append CRC. * This fixes a very old bug in the linux * kiss driver. -- dl9sau */ switch (*p & 0xff) { case 0x85: /* command from userspace especially for us, * not for delivery to the tnc */ if (len > 1) { int cmd = (p[1] & 0xff); switch(cmd) { case 3: ax->crcmode = CRC_MODE_SMACK; break; case 2: ax->crcmode = CRC_MODE_FLEX; break; case 1: ax->crcmode = CRC_MODE_NONE; break; case 0: default: ax->crcmode = CRC_MODE_SMACK_TEST; cmd = 0; } ax->crcauto = (cmd ? 0 : 1); printk(KERN_INFO "mkiss: %s: crc mode set to %d\n", ax->dev->name, cmd); } spin_unlock_bh(&ax->buflock); netif_start_queue(dev); return; default: count = kiss_esc(p, ax->xbuff, len); } } else { unsigned short crc; switch (ax->crcmode) { case CRC_MODE_SMACK_TEST: ax->crcmode = CRC_MODE_FLEX_TEST; printk(KERN_INFO "mkiss: %s: Trying crc-smack\n", ax->dev->name); fallthrough; case CRC_MODE_SMACK: *p |= 0x80; crc = swab16(crc16(0, p, len)); count = kiss_esc_crc(p, ax->xbuff, crc, len+2); break; case CRC_MODE_FLEX_TEST: ax->crcmode = CRC_MODE_NONE; printk(KERN_INFO "mkiss: %s: Trying crc-flexnet\n", ax->dev->name); fallthrough; case CRC_MODE_FLEX: *p |= 0x20; crc = calc_crc_flex(p, len); count = kiss_esc_crc(p, ax->xbuff, crc, len+2); break; default: count = kiss_esc(p, ax->xbuff, len); } } spin_unlock_bh(&ax->buflock); set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); actual = ax->tty->ops->write(ax->tty, ax->xbuff, count); dev->stats.tx_packets++; dev->stats.tx_bytes += actual; netif_trans_update(ax->dev); ax->xleft = count - actual; ax->xhead = ax->xbuff + actual; } /* Encapsulate an AX.25 packet and kick it into a TTY queue. */ static netdev_tx_t ax_xmit(struct sk_buff *skb, struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (skb->protocol == htons(ETH_P_IP)) return ax25_ip_xmit(skb); if (!netif_running(dev)) { printk(KERN_ERR "mkiss: %s: xmit call when iface is down\n", dev->name); return NETDEV_TX_BUSY; } if (netif_queue_stopped(dev)) { /* * May be we must check transmitter timeout here ? * 14 Oct 1994 Dmitry Gorodchanin. */ if (time_before(jiffies, dev_trans_start(dev) + 20 * HZ)) { /* 20 sec timeout not reached */ return NETDEV_TX_BUSY; } printk(KERN_ERR "mkiss: %s: transmit timed out, %s?\n", dev->name, (tty_chars_in_buffer(ax->tty) || ax->xleft) ? "bad line quality" : "driver error"); ax->xleft = 0; clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); netif_start_queue(dev); } /* We were not busy, so we are now... :-) */ netif_stop_queue(dev); ax_encaps(dev, skb->data, skb->len); kfree_skb(skb); return NETDEV_TX_OK; } static int ax_open_dev(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (ax->tty == NULL) return -ENODEV; return 0; } /* Open the low-level part of the AX25 channel. Easy! */ static int ax_open(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); unsigned long len; if (ax->tty == NULL) return -ENODEV; /* * Allocate the frame buffers: * * rbuff Receive buffer. * xbuff Transmit buffer. */ len = dev->mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; if ((ax->rbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL) goto norbuff; if ((ax->xbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL) goto noxbuff; ax->mtu = dev->mtu + 73; ax->buffsize = len; ax->rcount = 0; ax->xleft = 0; ax->flags &= (1 << AXF_INUSE); /* Clear ESCAPE & ERROR flags */ spin_lock_init(&ax->buflock); return 0; noxbuff: kfree(ax->rbuff); norbuff: return -ENOMEM; } /* Close the low-level part of the AX25 channel. Easy! */ static int ax_close(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (ax->tty) clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); netif_stop_queue(dev); return 0; } static const struct net_device_ops ax_netdev_ops = { .ndo_open = ax_open_dev, .ndo_stop = ax_close, .ndo_start_xmit = ax_xmit, .ndo_set_mac_address = ax_set_mac_address, }; static void ax_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->mtu = AX_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_AX25; dev->tx_queue_len = 10; dev->header_ops = &ax25_header_ops; dev->netdev_ops = &ax_netdev_ops; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); dev_addr_set(dev, (u8 *)&ax25_defaddr); dev->flags = IFF_BROADCAST | IFF_MULTICAST; } /* * We have a potential race on dereferencing tty->disc_data, because the tty * layer provides no locking at all - thus one cpu could be running * sixpack_receive_buf while another calls sixpack_close, which zeroes * tty->disc_data and frees the memory that sixpack_receive_buf is using. The * best way to fix this is to use a rwlock in the tty struct, but for now we * use a single global rwlock for all ttys in ppp line discipline. */ static DEFINE_RWLOCK(disc_data_lock); static struct mkiss *mkiss_get(struct tty_struct *tty) { struct mkiss *ax; read_lock(&disc_data_lock); ax = tty->disc_data; if (ax) refcount_inc(&ax->refcnt); read_unlock(&disc_data_lock); return ax; } static void mkiss_put(struct mkiss *ax) { if (refcount_dec_and_test(&ax->refcnt)) complete(&ax->dead); } static int crc_force = 0; /* Can be overridden with insmod */ static int mkiss_open(struct tty_struct *tty) { struct net_device *dev; struct mkiss *ax; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EOPNOTSUPP; dev = alloc_netdev(sizeof(struct mkiss), "ax%d", NET_NAME_UNKNOWN, ax_setup); if (!dev) { err = -ENOMEM; goto out; } ax = netdev_priv(dev); ax->dev = dev; spin_lock_init(&ax->buflock); refcount_set(&ax->refcnt, 1); init_completion(&ax->dead); ax->tty = tty; tty->disc_data = ax; tty->receive_room = 65535; tty_driver_flush_buffer(tty); /* Restore default settings */ dev->type = ARPHRD_AX25; /* Perform the low-level AX25 initialization. */ err = ax_open(ax->dev); if (err) goto out_free_netdev; err = register_netdev(dev); if (err) goto out_free_buffers; /* after register_netdev() - because else printk smashes the kernel */ switch (crc_force) { case 3: ax->crcmode = CRC_MODE_SMACK; printk(KERN_INFO "mkiss: %s: crc mode smack forced.\n", ax->dev->name); break; case 2: ax->crcmode = CRC_MODE_FLEX; printk(KERN_INFO "mkiss: %s: crc mode flexnet forced.\n", ax->dev->name); break; case 1: ax->crcmode = CRC_MODE_NONE; printk(KERN_INFO "mkiss: %s: crc mode disabled.\n", ax->dev->name); break; case 0: default: crc_force = 0; printk(KERN_INFO "mkiss: %s: crc mode is auto.\n", ax->dev->name); ax->crcmode = CRC_MODE_SMACK_TEST; } ax->crcauto = (crc_force ? 0 : 1); netif_start_queue(dev); /* Done. We have linked the TTY line to a channel. */ return 0; out_free_buffers: kfree(ax->rbuff); kfree(ax->xbuff); out_free_netdev: free_netdev(dev); out: return err; } static void mkiss_close(struct tty_struct *tty) { struct mkiss *ax; write_lock_irq(&disc_data_lock); ax = tty->disc_data; tty->disc_data = NULL; write_unlock_irq(&disc_data_lock); if (!ax) return; /* * We have now ensured that nobody can start using ap from now on, but * we have to wait for all existing users to finish. */ if (!refcount_dec_and_test(&ax->refcnt)) wait_for_completion(&ax->dead); /* * Halt the transmit queue so that a new transmit cannot scribble * on our buffers */ netif_stop_queue(ax->dev); unregister_netdev(ax->dev); /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); ax->tty = NULL; free_netdev(ax->dev); } /* Perform I/O control on an active ax25 channel. */ static int mkiss_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct mkiss *ax = mkiss_get(tty); struct net_device *dev; unsigned int tmp, err; /* First make sure we're connected. */ if (ax == NULL) return -ENXIO; dev = ax->dev; switch (cmd) { case SIOCGIFNAME: err = copy_to_user((void __user *) arg, ax->dev->name, strlen(ax->dev->name) + 1) ? -EFAULT : 0; break; case SIOCGIFENCAP: err = put_user(4, (int __user *) arg); break; case SIOCSIFENCAP: if (get_user(tmp, (int __user *) arg)) { err = -EFAULT; break; } ax->mode = tmp; dev->addr_len = AX25_ADDR_LEN; dev->hard_header_len = AX25_KISS_HEADER_LEN + AX25_MAX_HEADER_LEN + 3; dev->type = ARPHRD_AX25; err = 0; break; case SIOCSIFHWADDR: { char addr[AX25_ADDR_LEN]; if (copy_from_user(&addr, (void __user *) arg, AX25_ADDR_LEN)) { err = -EFAULT; break; } netif_tx_lock_bh(dev); __dev_addr_set(dev, addr, AX25_ADDR_LEN); netif_tx_unlock_bh(dev); err = 0; break; } default: err = -ENOIOCTLCMD; } mkiss_put(ax); return err; } /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when * a block of data has been received, which can now be decapsulated * and sent on to the AX.25 layer for further processing. */ static void mkiss_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct mkiss *ax = mkiss_get(tty); if (!ax) return; /* * Argh! mtu change time! - costs us the packet part received * at the change */ if (ax->mtu != ax->dev->mtu + 73) ax_changedmtu(ax); /* Read the characters out of the buffer */ while (count--) { if (fp != NULL && *fp++) { if (!test_and_set_bit(AXF_ERROR, &ax->flags)) ax->dev->stats.rx_errors++; cp++; continue; } kiss_unesc(ax, *cp++); } mkiss_put(ax); tty_unthrottle(tty); } /* * Called by the driver when there's room for more data. If we have * more packets to send, we send them here. */ static void mkiss_write_wakeup(struct tty_struct *tty) { struct mkiss *ax = mkiss_get(tty); int actual; if (!ax) return; if (ax->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); netif_wake_queue(ax->dev); goto out; } actual = tty->ops->write(tty, ax->xhead, ax->xleft); ax->xleft -= actual; ax->xhead += actual; out: mkiss_put(ax); } static struct tty_ldisc_ops ax_ldisc = { .owner = THIS_MODULE, .num = N_AX25, .name = "mkiss", .open = mkiss_open, .close = mkiss_close, .ioctl = mkiss_ioctl, .receive_buf = mkiss_receive_buf, .write_wakeup = mkiss_write_wakeup }; static const char banner[] __initconst = KERN_INFO \ "mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n"; static const char msg_regfail[] __initconst = KERN_ERR \ "mkiss: can't register line discipline (err = %d)\n"; static int __init mkiss_init_driver(void) { int status; printk(banner); status = tty_register_ldisc(&ax_ldisc); if (status != 0) printk(msg_regfail, status); return status; } static void __exit mkiss_exit_driver(void) { tty_unregister_ldisc(&ax_ldisc); } MODULE_AUTHOR("Ralf Baechle DL5RB <ralf@linux-mips.org>"); MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs"); module_param(crc_force, int, 0); MODULE_PARM_DESC(crc_force, "crc [0 = auto | 1 = none | 2 = flexnet | 3 = smack]"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_AX25); module_init(mkiss_init_driver); module_exit(mkiss_exit_driver); |
8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 | // SPDX-License-Identifier: GPL-2.0-only /* * * Author Karsten Keil <kkeil@novell.com> * * Copyright 2008 by Karsten Keil <kkeil@novell.com> */ #include <linux/slab.h> #include <linux/mISDNif.h> #include <linux/kthread.h> #include <linux/sched.h> #include <linux/sched/cputime.h> #include <linux/signal.h> #include "core.h" static u_int *debug; static inline void _queue_message(struct mISDNstack *st, struct sk_buff *skb) { struct mISDNhead *hh = mISDN_HEAD_P(skb); if (*debug & DEBUG_QUEUE_FUNC) printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n", __func__, hh->prim, hh->id, skb); skb_queue_tail(&st->msgq, skb); if (likely(!test_bit(mISDN_STACK_STOPPED, &st->status))) { test_and_set_bit(mISDN_STACK_WORK, &st->status); wake_up_interruptible(&st->workq); } } static int mISDN_queue_message(struct mISDNchannel *ch, struct sk_buff *skb) { _queue_message(ch->st, skb); return 0; } static struct mISDNchannel * get_channel4id(struct mISDNstack *st, u_int id) { struct mISDNchannel *ch; mutex_lock(&st->lmutex); list_for_each_entry(ch, &st->layer2, list) { if (id == ch->nr) goto unlock; } ch = NULL; unlock: mutex_unlock(&st->lmutex); return ch; } static void send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb) { struct sock *sk; struct sk_buff *cskb = NULL; read_lock(&sl->lock); sk_for_each(sk, &sl->head) { if (sk->sk_state != MISDN_BOUND) continue; if (!cskb) cskb = skb_copy(skb, GFP_ATOMIC); if (!cskb) { printk(KERN_WARNING "%s no skb\n", __func__); break; } if (!sock_queue_rcv_skb(sk, cskb)) cskb = NULL; } read_unlock(&sl->lock); dev_kfree_skb(cskb); } static void send_layer2(struct mISDNstack *st, struct sk_buff *skb) { struct sk_buff *cskb; struct mISDNhead *hh = mISDN_HEAD_P(skb); struct mISDNchannel *ch; int ret; if (!st) return; mutex_lock(&st->lmutex); if ((hh->id & MISDN_ID_ADDR_MASK) == MISDN_ID_ANY) { /* L2 for all */ list_for_each_entry(ch, &st->layer2, list) { if (list_is_last(&ch->list, &st->layer2)) { cskb = skb; skb = NULL; } else { cskb = skb_copy(skb, GFP_KERNEL); } if (cskb) { ret = ch->send(ch, cskb); if (ret) { if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG "%s ch%d prim(%x) addr(%x)" " err %d\n", __func__, ch->nr, hh->prim, ch->addr, ret); dev_kfree_skb(cskb); } } else { printk(KERN_WARNING "%s ch%d addr %x no mem\n", __func__, ch->nr, ch->addr); goto out; } } } else { list_for_each_entry(ch, &st->layer2, list) { if ((hh->id & MISDN_ID_ADDR_MASK) == ch->addr) { ret = ch->send(ch, skb); if (!ret) skb = NULL; goto out; } } ret = st->dev->teimgr->ctrl(st->dev->teimgr, CHECK_DATA, skb); if (!ret) skb = NULL; else if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG "%s mgr prim(%x) err %d\n", __func__, hh->prim, ret); } out: mutex_unlock(&st->lmutex); dev_kfree_skb(skb); } static inline int send_msg_to_layer(struct mISDNstack *st, struct sk_buff *skb) { struct mISDNhead *hh = mISDN_HEAD_P(skb); struct mISDNchannel *ch; int lm; lm = hh->prim & MISDN_LAYERMASK; if (*debug & DEBUG_QUEUE_FUNC) printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n", __func__, hh->prim, hh->id, skb); if (lm == 0x1) { if (!hlist_empty(&st->l1sock.head)) { __net_timestamp(skb); send_socklist(&st->l1sock, skb); } return st->layer1->send(st->layer1, skb); } else if (lm == 0x2) { if (!hlist_empty(&st->l1sock.head)) send_socklist(&st->l1sock, skb); send_layer2(st, skb); return 0; } else if (lm == 0x4) { ch = get_channel4id(st, hh->id); if (ch) return ch->send(ch, skb); else printk(KERN_WARNING "%s: dev(%s) prim(%x) id(%x) no channel\n", __func__, dev_name(&st->dev->dev), hh->prim, hh->id); } else if (lm == 0x8) { WARN_ON(lm == 0x8); ch = get_channel4id(st, hh->id); if (ch) return ch->send(ch, skb); else printk(KERN_WARNING "%s: dev(%s) prim(%x) id(%x) no channel\n", __func__, dev_name(&st->dev->dev), hh->prim, hh->id); } else { /* broadcast not handled yet */ printk(KERN_WARNING "%s: dev(%s) prim %x not delivered\n", __func__, dev_name(&st->dev->dev), hh->prim); } return -ESRCH; } static void do_clear_stack(struct mISDNstack *st) { } static int mISDNStackd(void *data) { struct mISDNstack *st = data; #ifdef MISDN_MSG_STATS u64 utime, stime; #endif int err = 0; sigfillset(¤t->blocked); if (*debug & DEBUG_MSG_THREAD) printk(KERN_DEBUG "mISDNStackd %s started\n", dev_name(&st->dev->dev)); if (st->notify != NULL) { complete(st->notify); st->notify = NULL; } for (;;) { struct sk_buff *skb; if (unlikely(test_bit(mISDN_STACK_STOPPED, &st->status))) { test_and_clear_bit(mISDN_STACK_WORK, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); } else test_and_set_bit(mISDN_STACK_RUNNING, &st->status); while (test_bit(mISDN_STACK_WORK, &st->status)) { skb = skb_dequeue(&st->msgq); if (!skb) { test_and_clear_bit(mISDN_STACK_WORK, &st->status); /* test if a race happens */ skb = skb_dequeue(&st->msgq); if (!skb) continue; test_and_set_bit(mISDN_STACK_WORK, &st->status); } #ifdef MISDN_MSG_STATS st->msg_cnt++; #endif err = send_msg_to_layer(st, skb); if (unlikely(err)) { if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG "%s: %s prim(%x) id(%x) " "send call(%d)\n", __func__, dev_name(&st->dev->dev), mISDN_HEAD_PRIM(skb), mISDN_HEAD_ID(skb), err); dev_kfree_skb(skb); continue; } if (unlikely(test_bit(mISDN_STACK_STOPPED, &st->status))) { test_and_clear_bit(mISDN_STACK_WORK, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); break; } } if (test_bit(mISDN_STACK_CLEARING, &st->status)) { test_and_set_bit(mISDN_STACK_STOPPED, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); do_clear_stack(st); test_and_clear_bit(mISDN_STACK_CLEARING, &st->status); test_and_set_bit(mISDN_STACK_RESTART, &st->status); } if (test_and_clear_bit(mISDN_STACK_RESTART, &st->status)) { test_and_clear_bit(mISDN_STACK_STOPPED, &st->status); test_and_set_bit(mISDN_STACK_RUNNING, &st->status); if (!skb_queue_empty(&st->msgq)) test_and_set_bit(mISDN_STACK_WORK, &st->status); } if (test_bit(mISDN_STACK_ABORT, &st->status)) break; if (st->notify != NULL) { complete(st->notify); st->notify = NULL; } #ifdef MISDN_MSG_STATS st->sleep_cnt++; #endif test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status); wait_event_interruptible(st->workq, (st->status & mISDN_STACK_ACTION_MASK)); if (*debug & DEBUG_MSG_THREAD) printk(KERN_DEBUG "%s: %s wake status %08lx\n", __func__, dev_name(&st->dev->dev), st->status); test_and_set_bit(mISDN_STACK_ACTIVE, &st->status); test_and_clear_bit(mISDN_STACK_WAKEUP, &st->status); if (test_bit(mISDN_STACK_STOPPED, &st->status)) { test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); #ifdef MISDN_MSG_STATS st->stopped_cnt++; #endif } } #ifdef MISDN_MSG_STATS printk(KERN_DEBUG "mISDNStackd daemon for %s proceed %d " "msg %d sleep %d stopped\n", dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt, st->stopped_cnt); task_cputime(st->thread, &utime, &stime); printk(KERN_DEBUG "mISDNStackd daemon for %s utime(%llu) stime(%llu)\n", dev_name(&st->dev->dev), utime, stime); printk(KERN_DEBUG "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n", dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw); printk(KERN_DEBUG "mISDNStackd daemon for %s killed now\n", dev_name(&st->dev->dev)); #endif test_and_set_bit(mISDN_STACK_KILLED, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status); test_and_clear_bit(mISDN_STACK_ABORT, &st->status); skb_queue_purge(&st->msgq); st->thread = NULL; if (st->notify != NULL) { complete(st->notify); st->notify = NULL; } return 0; } static int l1_receive(struct mISDNchannel *ch, struct sk_buff *skb) { if (!ch->st) return -ENODEV; __net_timestamp(skb); _queue_message(ch->st, skb); return 0; } void set_channel_address(struct mISDNchannel *ch, u_int sapi, u_int tei) { ch->addr = sapi | (tei << 8); } void __add_layer2(struct mISDNchannel *ch, struct mISDNstack *st) { list_add_tail(&ch->list, &st->layer2); } void add_layer2(struct mISDNchannel *ch, struct mISDNstack *st) { mutex_lock(&st->lmutex); __add_layer2(ch, st); mutex_unlock(&st->lmutex); } static int st_own_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg) { if (!ch->st || !ch->st->layer1) return -EINVAL; return ch->st->layer1->ctrl(ch->st->layer1, cmd, arg); } int create_stack(struct mISDNdevice *dev) { struct mISDNstack *newst; int err; DECLARE_COMPLETION_ONSTACK(done); newst = kzalloc(sizeof(struct mISDNstack), GFP_KERNEL); if (!newst) { printk(KERN_ERR "kmalloc mISDN_stack failed\n"); return -ENOMEM; } newst->dev = dev; INIT_LIST_HEAD(&newst->layer2); INIT_HLIST_HEAD(&newst->l1sock.head); rwlock_init(&newst->l1sock.lock); init_waitqueue_head(&newst->workq); skb_queue_head_init(&newst->msgq); mutex_init(&newst->lmutex); dev->D.st = newst; err = create_teimanager(dev); if (err) { printk(KERN_ERR "kmalloc teimanager failed\n"); kfree(newst); return err; } dev->teimgr->peer = &newst->own; dev->teimgr->recv = mISDN_queue_message; dev->teimgr->st = newst; newst->layer1 = &dev->D; dev->D.recv = l1_receive; dev->D.peer = &newst->own; newst->own.st = newst; newst->own.ctrl = st_own_ctrl; newst->own.send = mISDN_queue_message; newst->own.recv = mISDN_queue_message; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: st(%s)\n", __func__, dev_name(&newst->dev->dev)); newst->notify = &done; newst->thread = kthread_run(mISDNStackd, (void *)newst, "mISDN_%s", dev_name(&newst->dev->dev)); if (IS_ERR(newst->thread)) { err = PTR_ERR(newst->thread); printk(KERN_ERR "mISDN:cannot create kernel thread for %s (%d)\n", dev_name(&newst->dev->dev), err); delete_teimanager(dev->teimgr); kfree(newst); } else wait_for_completion(&done); return err; } int connect_layer1(struct mISDNdevice *dev, struct mISDNchannel *ch, u_int protocol, struct sockaddr_mISDN *adr) { struct mISDN_sock *msk = container_of(ch, struct mISDN_sock, ch); struct channel_req rq; int err; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n", __func__, dev_name(&dev->dev), protocol, adr->dev, adr->channel, adr->sapi, adr->tei); switch (protocol) { case ISDN_P_NT_S0: case ISDN_P_NT_E1: case ISDN_P_TE_S0: case ISDN_P_TE_E1: ch->recv = mISDN_queue_message; ch->peer = &dev->D.st->own; ch->st = dev->D.st; rq.protocol = protocol; rq.adr.channel = adr->channel; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); printk(KERN_DEBUG "%s: ret %d (dev %d)\n", __func__, err, dev->id); if (err) return err; write_lock_bh(&dev->D.st->l1sock.lock); sk_add_node(&msk->sk, &dev->D.st->l1sock.head); write_unlock_bh(&dev->D.st->l1sock.lock); break; default: return -ENOPROTOOPT; } return 0; } int connect_Bstack(struct mISDNdevice *dev, struct mISDNchannel *ch, u_int protocol, struct sockaddr_mISDN *adr) { struct channel_req rq, rq2; int pmask, err; struct Bprotocol *bp; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n", __func__, dev_name(&dev->dev), protocol, adr->dev, adr->channel, adr->sapi, adr->tei); ch->st = dev->D.st; pmask = 1 << (protocol & ISDN_P_B_MASK); if (pmask & dev->Bprotocols) { rq.protocol = protocol; rq.adr = *adr; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); if (err) return err; ch->recv = rq.ch->send; ch->peer = rq.ch; rq.ch->recv = ch->send; rq.ch->peer = ch; rq.ch->st = dev->D.st; } else { bp = get_Bprotocol4mask(pmask); if (!bp) return -ENOPROTOOPT; rq2.protocol = protocol; rq2.adr = *adr; rq2.ch = ch; err = bp->create(&rq2); if (err) return err; ch->recv = rq2.ch->send; ch->peer = rq2.ch; rq2.ch->st = dev->D.st; rq.protocol = rq2.protocol; rq.adr = *adr; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); if (err) { rq2.ch->ctrl(rq2.ch, CLOSE_CHANNEL, NULL); return err; } rq2.ch->recv = rq.ch->send; rq2.ch->peer = rq.ch; rq.ch->recv = rq2.ch->send; rq.ch->peer = rq2.ch; rq.ch->st = dev->D.st; } ch->protocol = protocol; ch->nr = rq.ch->nr; return 0; } int create_l2entity(struct mISDNdevice *dev, struct mISDNchannel *ch, u_int protocol, struct sockaddr_mISDN *adr) { struct channel_req rq; int err; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n", __func__, dev_name(&dev->dev), protocol, adr->dev, adr->channel, adr->sapi, adr->tei); rq.protocol = ISDN_P_TE_S0; if (dev->Dprotocols & (1 << ISDN_P_TE_E1)) rq.protocol = ISDN_P_TE_E1; switch (protocol) { case ISDN_P_LAPD_NT: rq.protocol = ISDN_P_NT_S0; if (dev->Dprotocols & (1 << ISDN_P_NT_E1)) rq.protocol = ISDN_P_NT_E1; fallthrough; case ISDN_P_LAPD_TE: ch->recv = mISDN_queue_message; ch->peer = &dev->D.st->own; ch->st = dev->D.st; rq.adr.channel = 0; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); printk(KERN_DEBUG "%s: ret 1 %d\n", __func__, err); if (err) break; rq.protocol = protocol; rq.adr = *adr; rq.ch = ch; err = dev->teimgr->ctrl(dev->teimgr, OPEN_CHANNEL, &rq); printk(KERN_DEBUG "%s: ret 2 %d\n", __func__, err); if (!err) { if ((protocol == ISDN_P_LAPD_NT) && !rq.ch) break; add_layer2(rq.ch, dev->D.st); rq.ch->recv = mISDN_queue_message; rq.ch->peer = &dev->D.st->own; rq.ch->ctrl(rq.ch, OPEN_CHANNEL, NULL); /* can't fail */ } break; default: err = -EPROTONOSUPPORT; } return err; } void delete_channel(struct mISDNchannel *ch) { struct mISDN_sock *msk = container_of(ch, struct mISDN_sock, ch); struct mISDNchannel *pch; if (!ch->st) { printk(KERN_WARNING "%s: no stack\n", __func__); return; } if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: st(%s) protocol(%x)\n", __func__, dev_name(&ch->st->dev->dev), ch->protocol); if (ch->protocol >= ISDN_P_B_START) { if (ch->peer) { ch->peer->ctrl(ch->peer, CLOSE_CHANNEL, NULL); ch->peer = NULL; } return; } switch (ch->protocol) { case ISDN_P_NT_S0: case ISDN_P_TE_S0: case ISDN_P_NT_E1: case ISDN_P_TE_E1: write_lock_bh(&ch->st->l1sock.lock); sk_del_node_init(&msk->sk); write_unlock_bh(&ch->st->l1sock.lock); ch->st->dev->D.ctrl(&ch->st->dev->D, CLOSE_CHANNEL, NULL); break; case ISDN_P_LAPD_TE: pch = get_channel4id(ch->st, ch->nr); if (pch) { mutex_lock(&ch->st->lmutex); list_del(&pch->list); mutex_unlock(&ch->st->lmutex); pch->ctrl(pch, CLOSE_CHANNEL, NULL); pch = ch->st->dev->teimgr; pch->ctrl(pch, CLOSE_CHANNEL, NULL); } else printk(KERN_WARNING "%s: no l2 channel\n", __func__); break; case ISDN_P_LAPD_NT: pch = ch->st->dev->teimgr; if (pch) { pch->ctrl(pch, CLOSE_CHANNEL, NULL); } else printk(KERN_WARNING "%s: no l2 channel\n", __func__); break; default: break; } return; } void delete_stack(struct mISDNdevice *dev) { struct mISDNstack *st = dev->D.st; DECLARE_COMPLETION_ONSTACK(done); if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: st(%s)\n", __func__, dev_name(&st->dev->dev)); if (dev->teimgr) delete_teimanager(dev->teimgr); if (st->thread) { if (st->notify) { printk(KERN_WARNING "%s: notifier in use\n", __func__); complete(st->notify); } st->notify = &done; test_and_set_bit(mISDN_STACK_ABORT, &st->status); test_and_set_bit(mISDN_STACK_WAKEUP, &st->status); wake_up_interruptible(&st->workq); wait_for_completion(&done); } if (!list_empty(&st->layer2)) printk(KERN_WARNING "%s: layer2 list not empty\n", __func__); if (!hlist_empty(&st->l1sock.head)) printk(KERN_WARNING "%s: layer1 list not empty\n", __func__); kfree(st); } void mISDN_initstack(u_int *dp) { debug = dp; } |
1 1 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * Copyright (c) 2012-2013 Red Hat, Inc. * All rights reserved. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_dir2.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_btree.h" #include "xfs_quota.h" #include "xfs_symlink.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_ialloc.h" #include "xfs_error.h" /* ----- Kernel only functions below ----- */ int xfs_readlink_bmap_ilocked( struct xfs_inode *ip, char *link) { struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS]; struct xfs_buf *bp; xfs_daddr_t d; char *cur_chunk; int pathlen = ip->i_disk_size; int nmaps = XFS_SYMLINK_MAPS; int byte_cnt; int n; int error = 0; int fsblocks = 0; int offset; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); fsblocks = xfs_symlink_blocks(mp, pathlen); error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0); if (error) goto out; offset = 0; for (n = 0; n < nmaps; n++) { d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); error = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, &bp, &xfs_symlink_buf_ops); if (error) return error; byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); if (pathlen < byte_cnt) byte_cnt = pathlen; cur_chunk = bp->b_addr; if (xfs_has_crc(mp)) { if (!xfs_symlink_hdr_ok(ip->i_ino, offset, byte_cnt, bp)) { error = -EFSCORRUPTED; xfs_alert(mp, "symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)", offset, byte_cnt, ip->i_ino); xfs_buf_relse(bp); goto out; } cur_chunk += sizeof(struct xfs_dsymlink_hdr); } memcpy(link + offset, cur_chunk, byte_cnt); pathlen -= byte_cnt; offset += byte_cnt; xfs_buf_relse(bp); } ASSERT(pathlen == 0); link[ip->i_disk_size] = '\0'; error = 0; out: return error; } int xfs_readlink( struct xfs_inode *ip, char *link) { struct xfs_mount *mp = ip->i_mount; xfs_fsize_t pathlen; int error = -EFSCORRUPTED; trace_xfs_readlink(ip); if (xfs_is_shutdown(mp)) return -EIO; xfs_ilock(ip, XFS_ILOCK_SHARED); pathlen = ip->i_disk_size; if (!pathlen) goto out; if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) { xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)", __func__, (unsigned long long) ip->i_ino, (long long) pathlen); ASSERT(0); goto out; } if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { /* * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED * if if_data is junk. */ if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_u1.if_data)) goto out; memcpy(link, ip->i_df.if_u1.if_data, pathlen + 1); error = 0; } else { error = xfs_readlink_bmap_ilocked(ip, link); } out: xfs_iunlock(ip, XFS_ILOCK_SHARED); return error; } int xfs_symlink( struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp) { struct xfs_mount *mp = dp->i_mount; struct xfs_trans *tp = NULL; struct xfs_inode *ip = NULL; int error = 0; int pathlen; bool unlock_dp_on_error = false; xfs_fileoff_t first_fsb; xfs_filblks_t fs_blocks; int nmaps; struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS]; xfs_daddr_t d; const char *cur_chunk; int byte_cnt; int n; struct xfs_buf *bp; prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; struct xfs_dquot *pdqp = NULL; uint resblks; xfs_ino_t ino; *ipp = NULL; trace_xfs_symlink(dp, link_name); if (xfs_is_shutdown(mp)) return -EIO; /* * Check component lengths of the target path name. */ pathlen = strlen(target_path); if (pathlen >= XFS_SYMLINK_MAXLEN) /* total string too long */ return -ENAMETOOLONG; ASSERT(pathlen > 0); prid = xfs_get_initial_prid(dp); /* * Make sure that we have allocated dquot(s) on disk. */ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), mapped_fsgid(idmap, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) return error; /* * The symlink will fit into the inode data fork? * There can't be any attributes so we get the whole variable part. */ if (pathlen <= XFS_LITINO(mp)) fs_blocks = 0; else fs_blocks = xfs_symlink_blocks(mp, pathlen); resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); error = xfs_trans_alloc_icreate(mp, &M_RES(mp)->tr_symlink, udqp, gdqp, pdqp, resblks, &tp); if (error) goto out_release_dquots; xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; /* * Check whether the directory allows new symlinks or not. */ if (dp->i_diflags & XFS_DIFLAG_NOSYMLINKS) { error = -EPERM; goto out_trans_cancel; } /* * Allocate an inode for the symlink. */ error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino); if (!error) error = xfs_init_new_inode(idmap, tp, dp, ino, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, false, &ip); if (error) goto out_trans_cancel; /* * Now we join the directory inode to the transaction. We do not do it * earlier because xfs_dir_ialloc might commit the previous transaction * (and release all the locks). An error from here on will result in * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); unlock_dp_on_error = false; /* * Also attach the dquot(s) to it, if applicable. */ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); resblks -= XFS_IALLOC_SPACE_RES(mp); /* * If the symlink will fit into the inode, write it inline. */ if (pathlen <= xfs_inode_data_fork_size(ip)) { xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen); ip->i_disk_size = pathlen; ip->i_df.if_format = XFS_DINODE_FMT_LOCAL; xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); } else { int offset; first_fsb = 0; nmaps = XFS_SYMLINK_MAPS; error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_METADATA, resblks, mval, &nmaps); if (error) goto out_trans_cancel; resblks -= fs_blocks; ip->i_disk_size = pathlen; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); cur_chunk = target_path; offset = 0; for (n = 0; n < nmaps; n++) { char *buf; d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, &bp); if (error) goto out_trans_cancel; bp->b_ops = &xfs_symlink_buf_ops; byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); byte_cnt = min(byte_cnt, pathlen); buf = bp->b_addr; buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset, byte_cnt, bp); memcpy(buf, cur_chunk, byte_cnt); cur_chunk += byte_cnt; pathlen -= byte_cnt; offset += byte_cnt; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) - (char *)bp->b_addr); } ASSERT(pathlen == 0); } i_size_write(VFS_I(ip), ip->i_disk_size); /* * Create the directory entry for the symlink. */ error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks); if (error) goto out_trans_cancel; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* * If this is a synchronous mount, make sure that the * symlink transaction goes to disk before returning to * the user. */ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); if (error) goto out_release_inode; xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); *ipp = ip; return 0; out_trans_cancel: xfs_trans_cancel(tp); out_release_inode: /* * Wait until after the current transaction is aborted to finish the * setup of the inode and release the inode. This prevents recursive * transactions and deadlocks from xfs_inactive. */ if (ip) { xfs_finish_inode_setup(ip); xfs_irele(ip); } out_release_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; } /* * Free a symlink that has blocks associated with it. * * Note: zero length symlinks are not allowed to exist. When we set the size to * zero, also change it to a regular file so that it does not get written to * disk as a zero length symlink. The inode is on the unlinked list already, so * userspace cannot find this inode anymore, so this change is not user visible * but allows us to catch corrupt zero-length symlinks in the verifiers. */ STATIC int xfs_inactive_symlink_rmt( struct xfs_inode *ip) { struct xfs_buf *bp; int done; int error; int i; xfs_mount_t *mp; xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; int nmaps; int size; xfs_trans_t *tp; mp = ip->i_mount; ASSERT(!xfs_need_iread_extents(&ip->i_df)); /* * We're freeing a symlink that has some * blocks allocated to it. Free the * blocks here. We know that we've got * either 1 or 2 extents and that we can * free them all in one bunmapi call. */ ASSERT(ip->i_df.if_nextents > 0 && ip->i_df.if_nextents <= 2); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Lock the inode, fix the size, turn it into a regular file and join it * to the transaction. Hold it so in the normal path, we still have it * locked for the second transaction. In the error paths we need it * held so the cancel won't rele it, see below. */ size = (int)ip->i_disk_size; ip->i_disk_size = 0; VFS_I(ip)->i_mode = (VFS_I(ip)->i_mode & ~S_IFMT) | S_IFREG; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Find the block(s) so we can inval and unmap them. */ done = 0; nmaps = ARRAY_SIZE(mval); error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), mval, &nmaps, 0); if (error) goto error_trans_cancel; /* * Invalidate the block(s). No validation is done. */ for (i = 0; i < nmaps; i++) { error = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0, &bp); if (error) goto error_trans_cancel; xfs_trans_binval(tp, bp); } /* * Unmap the dead block(s) to the dfops. */ error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &done); if (error) goto error_trans_cancel; ASSERT(done); /* * Commit the transaction. This first logs the EFI and the inode, then * rolls and commits the transaction that frees the extents. */ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_trans_commit(tp); if (error) { ASSERT(xfs_is_shutdown(mp)); goto error_unlock; } /* * Remove the memory for extent descriptions (just bookkeeping). */ if (ip->i_df.if_bytes) xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; error_trans_cancel: xfs_trans_cancel(tp); error_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } /* * xfs_inactive_symlink - free a symlink */ int xfs_inactive_symlink( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; int pathlen; trace_xfs_inactive_symlink(ip); if (xfs_is_shutdown(mp)) return -EIO; xfs_ilock(ip, XFS_ILOCK_EXCL); pathlen = (int)ip->i_disk_size; ASSERT(pathlen); if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN) { xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", __func__, (unsigned long long)ip->i_ino, pathlen); xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(0); return -EFSCORRUPTED; } /* * Inline fork state gets removed by xfs_difree() so we have nothing to * do here in that case. */ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); /* remove the remote symlink */ return xfs_inactive_symlink_rmt(ip); } |
4 447 473 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_H #define _LINUX_TTY_H #include <linux/fs.h> #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> #include <linux/tty_buffer.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/tty_port.h> #include <linux/mutex.h> #include <linux/tty_flags.h> #include <uapi/linux/tty.h> #include <linux/rwsem.h> #include <linux/llist.h> /* * (Note: the *_driver.minor_start values 1, 64, 128, 192 are * hardcoded at present.) */ #define NR_UNIX98_PTY_DEFAULT 4096 /* Default maximum for Unix98 ptys */ #define NR_UNIX98_PTY_RESERVE 1024 /* Default reserve for main devpts */ #define NR_UNIX98_PTY_MAX (1 << MINORBITS) /* Absolute limit */ /* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character * isn't in use (eg VINTR has no character etc) */ #define __DISABLED_CHAR '\0' #define INTR_CHAR(tty) ((tty)->termios.c_cc[VINTR]) #define QUIT_CHAR(tty) ((tty)->termios.c_cc[VQUIT]) #define ERASE_CHAR(tty) ((tty)->termios.c_cc[VERASE]) #define KILL_CHAR(tty) ((tty)->termios.c_cc[VKILL]) #define EOF_CHAR(tty) ((tty)->termios.c_cc[VEOF]) #define TIME_CHAR(tty) ((tty)->termios.c_cc[VTIME]) #define MIN_CHAR(tty) ((tty)->termios.c_cc[VMIN]) #define SWTC_CHAR(tty) ((tty)->termios.c_cc[VSWTC]) #define START_CHAR(tty) ((tty)->termios.c_cc[VSTART]) #define STOP_CHAR(tty) ((tty)->termios.c_cc[VSTOP]) #define SUSP_CHAR(tty) ((tty)->termios.c_cc[VSUSP]) #define EOL_CHAR(tty) ((tty)->termios.c_cc[VEOL]) #define REPRINT_CHAR(tty) ((tty)->termios.c_cc[VREPRINT]) #define DISCARD_CHAR(tty) ((tty)->termios.c_cc[VDISCARD]) #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE]) #define LNEXT_CHAR(tty) ((tty)->termios.c_cc[VLNEXT]) #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2]) #define _I_FLAG(tty, f) ((tty)->termios.c_iflag & (f)) #define _O_FLAG(tty, f) ((tty)->termios.c_oflag & (f)) #define _C_FLAG(tty, f) ((tty)->termios.c_cflag & (f)) #define _L_FLAG(tty, f) ((tty)->termios.c_lflag & (f)) #define I_IGNBRK(tty) _I_FLAG((tty), IGNBRK) #define I_BRKINT(tty) _I_FLAG((tty), BRKINT) #define I_IGNPAR(tty) _I_FLAG((tty), IGNPAR) #define I_PARMRK(tty) _I_FLAG((tty), PARMRK) #define I_INPCK(tty) _I_FLAG((tty), INPCK) #define I_ISTRIP(tty) _I_FLAG((tty), ISTRIP) #define I_INLCR(tty) _I_FLAG((tty), INLCR) #define I_IGNCR(tty) _I_FLAG((tty), IGNCR) #define I_ICRNL(tty) _I_FLAG((tty), ICRNL) #define I_IUCLC(tty) _I_FLAG((tty), IUCLC) #define I_IXON(tty) _I_FLAG((tty), IXON) #define I_IXANY(tty) _I_FLAG((tty), IXANY) #define I_IXOFF(tty) _I_FLAG((tty), IXOFF) #define I_IMAXBEL(tty) _I_FLAG((tty), IMAXBEL) #define I_IUTF8(tty) _I_FLAG((tty), IUTF8) #define O_OPOST(tty) _O_FLAG((tty), OPOST) #define O_OLCUC(tty) _O_FLAG((tty), OLCUC) #define O_ONLCR(tty) _O_FLAG((tty), ONLCR) #define O_OCRNL(tty) _O_FLAG((tty), OCRNL) #define O_ONOCR(tty) _O_FLAG((tty), ONOCR) #define O_ONLRET(tty) _O_FLAG((tty), ONLRET) #define O_OFILL(tty) _O_FLAG((tty), OFILL) #define O_OFDEL(tty) _O_FLAG((tty), OFDEL) #define O_NLDLY(tty) _O_FLAG((tty), NLDLY) #define O_CRDLY(tty) _O_FLAG((tty), CRDLY) #define O_TABDLY(tty) _O_FLAG((tty), TABDLY) #define O_BSDLY(tty) _O_FLAG((tty), BSDLY) #define O_VTDLY(tty) _O_FLAG((tty), VTDLY) #define O_FFDLY(tty) _O_FLAG((tty), FFDLY) #define C_BAUD(tty) _C_FLAG((tty), CBAUD) #define C_CSIZE(tty) _C_FLAG((tty), CSIZE) #define C_CSTOPB(tty) _C_FLAG((tty), CSTOPB) #define C_CREAD(tty) _C_FLAG((tty), CREAD) #define C_PARENB(tty) _C_FLAG((tty), PARENB) #define C_PARODD(tty) _C_FLAG((tty), PARODD) #define C_HUPCL(tty) _C_FLAG((tty), HUPCL) #define C_CLOCAL(tty) _C_FLAG((tty), CLOCAL) #define C_CIBAUD(tty) _C_FLAG((tty), CIBAUD) #define C_CRTSCTS(tty) _C_FLAG((tty), CRTSCTS) #define C_CMSPAR(tty) _C_FLAG((tty), CMSPAR) #define L_ISIG(tty) _L_FLAG((tty), ISIG) #define L_ICANON(tty) _L_FLAG((tty), ICANON) #define L_XCASE(tty) _L_FLAG((tty), XCASE) #define L_ECHO(tty) _L_FLAG((tty), ECHO) #define L_ECHOE(tty) _L_FLAG((tty), ECHOE) #define L_ECHOK(tty) _L_FLAG((tty), ECHOK) #define L_ECHONL(tty) _L_FLAG((tty), ECHONL) #define L_NOFLSH(tty) _L_FLAG((tty), NOFLSH) #define L_TOSTOP(tty) _L_FLAG((tty), TOSTOP) #define L_ECHOCTL(tty) _L_FLAG((tty), ECHOCTL) #define L_ECHOPRT(tty) _L_FLAG((tty), ECHOPRT) #define L_ECHOKE(tty) _L_FLAG((tty), ECHOKE) #define L_FLUSHO(tty) _L_FLAG((tty), FLUSHO) #define L_PENDIN(tty) _L_FLAG((tty), PENDIN) #define L_IEXTEN(tty) _L_FLAG((tty), IEXTEN) #define L_EXTPROC(tty) _L_FLAG((tty), EXTPROC) struct device; struct signal_struct; struct tty_operations; /** * struct tty_struct - state associated with a tty while open * * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero * frees the structure * @dev: class device or %NULL (e.g. ptys, serdev) * @driver: &struct tty_driver operating this tty * @ops: &struct tty_operations of @driver for this tty (open, close, etc.) * @index: index of this tty (e.g. to construct @name like tty12) * @ldisc_sem: protects line discipline changes (@ldisc) -- lock tty not pty * @ldisc: the current line discipline for this tty (n_tty by default) * @atomic_write_lock: protects against concurrent writers, i.e. locks * @write_cnt, @write_buf and similar * @legacy_mutex: leftover from history (BKL -> BTM -> @legacy_mutex), * protecting several operations on this tty * @throttle_mutex: protects against concurrent tty_throttle_safe() and * tty_unthrottle_safe() (but not tty_unthrottle()) * @termios_rwsem: protects @termios and @termios_locked * @winsize_mutex: protects @winsize * @termios: termios for the current tty, copied from/to @driver.termios * @termios_locked: locked termios (by %TIOCGLCKTRMIOS and %TIOCSLCKTRMIOS * ioctls) * @name: name of the tty constructed by tty_line_name() (e.g. ttyS3) * @flags: bitwise OR of %TTY_THROTTLED, %TTY_IO_ERROR, ... * @count: count of open processes, reaching zero cancels all the work for * this tty and drops a @kref too (but does not free this tty) * @winsize: size of the terminal "window" (cf. @winsize_mutex) * @flow: flow settings grouped together, see also @flow.unused * @flow.lock: lock for @flow members * @flow.stopped: tty stopped/started by stop_tty()/start_tty() * @flow.tco_stopped: tty stopped/started by %TCOOFF/%TCOON ioctls (it has * precedence over @flow.stopped) * @flow.unused: alignment for Alpha, so that no members other than @flow.* are * modified by the same 64b word store. The @flow's __aligned is * there for the very same reason. * @ctrl: control settings grouped together, see also @ctrl.unused * @ctrl.lock: lock for @ctrl members * @ctrl.pgrp: process group of this tty (setpgrp(2)) * @ctrl.session: session of this tty (setsid(2)). Writes are protected by both * @ctrl.lock and @legacy_mutex, readers must use at least one of * them. * @ctrl.pktstatus: packet mode status (bitwise OR of %TIOCPKT_ constants) * @ctrl.packet: packet mode enabled * @ctrl.unused: alignment for Alpha, see @flow.unused for explanation * @hw_stopped: not controlled by the tty layer, under @driver's control for CTS * handling * @receive_room: bytes permitted to feed to @ldisc without any being lost * @flow_change: controls behavior of throttling, see tty_throttle_safe() and * tty_unthrottle_safe() * @link: link to another pty (master -> slave and vice versa) * @fasync: state for %O_ASYNC (for %SIGIO); managed by fasync_helper() * @write_wait: concurrent writers are waiting in this queue until they are * allowed to write * @read_wait: readers wait for data in this queue * @hangup_work: normally a work to perform a hangup (do_tty_hangup()); while * freeing the tty, (re)used to release_one_tty() * @disc_data: pointer to @ldisc's private data (e.g. to &struct n_tty_data) * @driver_data: pointer to @driver's private data (e.g. &struct uart_state) * @files_lock: protects @tty_files list * @tty_files: list of (re)openers of this tty (i.e. linked &struct * tty_file_private) * @closing: when set during close, n_tty processes only START & STOP chars * @write_buf: temporary buffer used during tty_write() to copy user data to * @write_cnt: count of bytes written in tty_write() to @write_buf * @SAK_work: if the tty has a pending do_SAK, it is queued here * @port: persistent storage for this device (i.e. &struct tty_port) * * All of the state associated with a tty while the tty is open. Persistent * storage for tty devices is referenced here as @port and is documented in * &struct tty_port. */ struct tty_struct { struct kref kref; int index; struct device *dev; struct tty_driver *driver; struct tty_port *port; const struct tty_operations *ops; struct tty_ldisc *ldisc; struct ld_semaphore ldisc_sem; struct mutex atomic_write_lock; struct mutex legacy_mutex; struct mutex throttle_mutex; struct rw_semaphore termios_rwsem; struct mutex winsize_mutex; struct ktermios termios, termios_locked; char name[64]; unsigned long flags; int count; unsigned int receive_room; struct winsize winsize; struct { spinlock_t lock; bool stopped; bool tco_stopped; unsigned long unused[0]; } __aligned(sizeof(unsigned long)) flow; struct { struct pid *pgrp; struct pid *session; spinlock_t lock; unsigned char pktstatus; bool packet; unsigned long unused[0]; } __aligned(sizeof(unsigned long)) ctrl; bool hw_stopped; bool closing; int flow_change; struct tty_struct *link; struct fasync_struct *fasync; wait_queue_head_t write_wait; wait_queue_head_t read_wait; struct work_struct hangup_work; void *disc_data; void *driver_data; spinlock_t files_lock; int write_cnt; unsigned char *write_buf; struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 struct work_struct SAK_work; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ struct tty_file_private { struct tty_struct *tty; struct file *file; struct list_head list; }; /** * DOC: TTY Struct Flags * * These bits are used in the :c:member:`tty_struct.flags` field. * * So that interrupts won't be able to mess up the queues, * copy_to_cooked must be atomic with respect to itself, as must * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. * * TTY_THROTTLED * Driver input is throttled. The ldisc should call * :c:member:`tty_driver.unthrottle()` in order to resume reception when * it is ready to process more data (at threshold min). * * TTY_IO_ERROR * If set, causes all subsequent userspace read/write calls on the tty to * fail, returning -%EIO. (May be no ldisc too.) * * TTY_OTHER_CLOSED * Device is a pty and the other side has closed. * * TTY_EXCLUSIVE * Exclusive open mode (a single opener). * * TTY_DO_WRITE_WAKEUP * If set, causes the driver to call the * :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume * transmission when it can accept more data to transmit. * * TTY_LDISC_OPEN * Indicates that a line discipline is open. For debugging purposes only. * * TTY_PTY_LOCK * A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic. * * TTY_NO_WRITE_SPLIT * Prevent driver from splitting up writes into smaller chunks (preserve * write boundaries to driver). * * TTY_HUPPED * The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`. * * TTY_HUPPING * The TTY is in the process of hanging up to abort potential readers. * * TTY_LDISC_CHANGING * Line discipline for this TTY is being changed. I/O should not block * when this is set. Use tty_io_nonblock() to check. * * TTY_LDISC_HALTED * Line discipline for this TTY was stopped. No work should be queued to * this ldisc. */ #define TTY_THROTTLED 0 #define TTY_IO_ERROR 1 #define TTY_OTHER_CLOSED 2 #define TTY_EXCLUSIVE 3 #define TTY_DO_WRITE_WAKEUP 5 #define TTY_LDISC_OPEN 11 #define TTY_PTY_LOCK 16 #define TTY_NO_WRITE_SPLIT 17 #define TTY_HUPPED 18 #define TTY_HUPPING 19 #define TTY_LDISC_CHANGING 20 #define TTY_LDISC_HALTED 22 static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { return file->f_flags & O_NONBLOCK || test_bit(TTY_LDISC_CHANGING, &tty->flags); } static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); } static inline bool tty_throttled(struct tty_struct *tty) { return test_bit(TTY_THROTTLED, &tty->flags); } #ifdef CONFIG_TTY void tty_kref_put(struct tty_struct *tty); struct pid *tty_get_pgrp(struct tty_struct *tty); void tty_vhangup_self(void); void disassociate_ctty(int priv); dev_t tty_devnum(struct tty_struct *tty); void proc_clear_tty(struct task_struct *p); struct tty_struct *get_current_tty(void); /* tty_io.c */ int __init tty_init(void); const char *tty_name(const struct tty_struct *tty); struct tty_struct *tty_kopen_exclusive(dev_t device); struct tty_struct *tty_kopen_shared(dev_t device); void tty_kclose(struct tty_struct *tty); int tty_dev_name_to_number(const char *name, dev_t *number); #else static inline void tty_kref_put(struct tty_struct *tty) { } static inline struct pid *tty_get_pgrp(struct tty_struct *tty) { return NULL; } static inline void tty_vhangup_self(void) { } static inline void disassociate_ctty(int priv) { } static inline dev_t tty_devnum(struct tty_struct *tty) { return 0; } static inline void proc_clear_tty(struct task_struct *p) { } static inline struct tty_struct *get_current_tty(void) { return NULL; } /* tty_io.c */ static inline int __init tty_init(void) { return 0; } static inline const char *tty_name(const struct tty_struct *tty) { return "(none)"; } static inline struct tty_struct *tty_kopen_exclusive(dev_t device) { return ERR_PTR(-ENODEV); } static inline void tty_kclose(struct tty_struct *tty) { } static inline int tty_dev_name_to_number(const char *name, dev_t *number) { return -ENOTSUPP; } #endif extern struct ktermios tty_std_termios; int vcs_init(void); extern const struct class tty_class; /** * tty_kref_get - get a tty reference * @tty: tty device * * Returns: a new reference to a tty object. The caller must hold sufficient * locks/counts to ensure that their existing reference cannot go away */ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); return tty; } const char *tty_driver_name(const struct tty_struct *tty); void tty_wait_until_sent(struct tty_struct *tty, long timeout); void stop_tty(struct tty_struct *tty); void start_tty(struct tty_struct *tty); void tty_write_message(struct tty_struct *tty, char *msg); int tty_send_xchar(struct tty_struct *tty, char ch); int tty_put_char(struct tty_struct *tty, unsigned char c); unsigned int tty_chars_in_buffer(struct tty_struct *tty); unsigned int tty_write_room(struct tty_struct *tty); void tty_driver_flush_buffer(struct tty_struct *tty); void tty_unthrottle(struct tty_struct *tty); bool tty_throttle_safe(struct tty_struct *tty); bool tty_unthrottle_safe(struct tty_struct *tty); int tty_do_resize(struct tty_struct *tty, struct winsize *ws); int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); int is_current_pgrp_orphaned(void); void tty_hangup(struct tty_struct *tty); void tty_vhangup(struct tty_struct *tty); int tty_hung_up_p(struct file *filp); void do_SAK(struct tty_struct *tty); void __do_SAK(struct tty_struct *tty); void no_tty(void); speed_t tty_termios_baud_rate(const struct ktermios *termios); void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** * tty_get_baud_rate - get tty bit rates * @tty: tty to query * * Returns: the baud rate as an integer for this terminal. The termios lock * must be held by the caller and the terminal bit flags may be updated. * * Locking: none */ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) { return tty_termios_baud_rate(&tty->termios); } unsigned char tty_get_char_size(unsigned int cflag); unsigned char tty_get_frame_size(unsigned int cflag); void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old); bool tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); void tty_wakeup(struct tty_struct *tty); int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); int tty_perform_flush(struct tty_struct *tty, unsigned long arg); struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); void tty_release_struct(struct tty_struct *tty, int idx); void tty_init_termios(struct tty_struct *tty); void tty_save_termios(struct tty_struct *tty); int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty); extern struct mutex tty_mutex; /* n_tty.c */ void n_tty_inherit_ops(struct tty_ldisc_ops *ops); #ifdef CONFIG_TTY void __init n_tty_init(void); #else static inline void n_tty_init(void) { } #endif /* tty_audit.c */ #ifdef CONFIG_AUDIT void tty_audit_exit(void); void tty_audit_fork(struct signal_struct *sig); int tty_audit_push(void); #else static inline void tty_audit_exit(void) { } static inline void tty_audit_fork(struct signal_struct *sig) { } static inline int tty_audit_push(void) { return 0; } #endif /* tty_ioctl.c */ int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd, unsigned long arg); /* vt.c */ int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); /* tty_mutex.c */ /* functions for preparation of BKL removal */ void tty_lock(struct tty_struct *tty); int tty_lock_interruptible(struct tty_struct *tty); void tty_unlock(struct tty_struct *tty); void tty_lock_slave(struct tty_struct *tty); void tty_unlock_slave(struct tty_struct *tty); void tty_set_lock_subclass(struct tty_struct *tty); #endif |
71 15 64 3 8 57 60 1 1 11 63 2 4 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | /* GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 only, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see http://www.gnu.org/licenses * * Please visit http://www.xyratex.com/contact if you need additional * information or have any questions. * * GPL HEADER END */ /* * Copyright 2012 Xyratex Technology Limited * * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation. */ #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/crc32.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> #include <asm/simd.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 #define PCLMUL_MIN_LEN 64L /* minimum size of buffer * for crc32_pclmul_le_16 */ #define SCALE_F 16L /* size of xmm register */ #define SCALE_F_MASK (SCALE_F - 1) u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); static u32 __attribute__((pure)) crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) { unsigned int iquotient; unsigned int iremainder; unsigned int prealign; if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) return crc32_le(crc, p, len); if ((long)p & SCALE_F_MASK) { /* align p to 16 byte */ prealign = SCALE_F - ((long)p & SCALE_F_MASK); crc = crc32_le(crc, p, prealign); len -= prealign; p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & ~SCALE_F_MASK); } iquotient = len & (~SCALE_F_MASK); iremainder = len & SCALE_F_MASK; kernel_fpu_begin(); crc = crc32_pclmul_le_16(p, iquotient, crc); kernel_fpu_end(); if (iremainder) crc = crc32_le(crc, p + iquotient, iremainder); return crc; } static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) { u32 *key = crypto_tfm_ctx(tfm); *key = 0; return 0; } static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, unsigned int keylen) { u32 *mctx = crypto_shash_ctx(hash); if (keylen != sizeof(u32)) return -EINVAL; *mctx = le32_to_cpup((__le32 *)key); return 0; } static int crc32_pclmul_init(struct shash_desc *desc) { u32 *mctx = crypto_shash_ctx(desc->tfm); u32 *crcp = shash_desc_ctx(desc); *crcp = *mctx; return 0; } static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, unsigned int len) { u32 *crcp = shash_desc_ctx(desc); *crcp = crc32_pclmul_le(*crcp, data, len); return 0; } /* No final XOR 0xFFFFFFFF, like crc32_le */ static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); return 0; } static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); } static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) { u32 *crcp = shash_desc_ctx(desc); *(__le32 *)out = cpu_to_le32p(crcp); return 0; } static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, out); } static struct shash_alg alg = { .setkey = crc32_pclmul_setkey, .init = crc32_pclmul_init, .update = crc32_pclmul_update, .final = crc32_pclmul_final, .finup = crc32_pclmul_finup, .digest = crc32_pclmul_digest, .descsize = sizeof(u32), .digestsize = CHKSUM_DIGEST_SIZE, .base = { .cra_name = "crc32", .cra_driver_name = "crc32-pclmul", .cra_priority = 200, .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, .cra_init = crc32_pclmul_cra_init, } }; static const struct x86_cpu_id crc32pclmul_cpu_id[] = { X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); static int __init crc32_pclmul_mod_init(void) { if (!x86_match_cpu(crc32pclmul_cpu_id)) { pr_info("PCLMULQDQ-NI instructions are not detected.\n"); return -ENODEV; } return crypto_register_shash(&alg); } static void __exit crc32_pclmul_mod_fini(void) { crypto_unregister_shash(&alg); } module_init(crc32_pclmul_mod_init); module_exit(crc32_pclmul_mod_fini); MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crc32"); MODULE_ALIAS_CRYPTO("crc32-pclmul"); |
3 3 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | /* * linux/fs/nls/nls_cp1250.c * * Charset cp1250 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x20ac, 0x0000, 0x201a, 0x0000, 0x201e, 0x2026, 0x2020, 0x2021, 0x0000, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179, /* 0x90*/ 0x0000, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x0000, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a, /* 0xa0*/ 0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x015e, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x017b, /* 0xb0*/ 0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c, /* 0xc0*/ 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, /* 0xd0*/ 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, /* 0xe0*/ 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, /* 0xf0*/ 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0x00, 0x00, 0x00, 0xa4, 0x00, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0xae, 0x00, /* 0xa8-0xaf */ 0xb0, 0xb1, 0x00, 0x00, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0x00, 0x00, 0xbb, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0xc1, 0xc2, 0x00, 0xc4, 0x00, 0x00, 0xc7, /* 0xc0-0xc7 */ 0x00, 0xc9, 0x00, 0xcb, 0x00, 0xcd, 0xce, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0x00, 0x00, 0xda, 0x00, 0xdc, 0xdd, 0x00, 0xdf, /* 0xd8-0xdf */ 0x00, 0xe1, 0xe2, 0x00, 0xe4, 0x00, 0x00, 0xe7, /* 0xe0-0xe7 */ 0x00, 0xe9, 0x00, 0xeb, 0x00, 0xed, 0xee, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0x00, 0x00, 0xfa, 0x00, 0xfc, 0xfd, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0xc3, 0xe3, 0xa5, 0xb9, 0xc6, 0xe6, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xc8, 0xe8, 0xcf, 0xef, /* 0x08-0x0f */ 0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xca, 0xea, 0xcc, 0xec, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0xc5, 0xe5, 0x00, 0x00, 0xbc, 0xbe, 0x00, /* 0x38-0x3f */ 0x00, 0xa3, 0xb3, 0xd1, 0xf1, 0x00, 0x00, 0xd2, /* 0x40-0x47 */ 0xf2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xd5, 0xf5, 0x00, 0x00, 0xc0, 0xe0, 0x00, 0x00, /* 0x50-0x57 */ 0xd8, 0xf8, 0x8c, 0x9c, 0x00, 0x00, 0xaa, 0xba, /* 0x58-0x5f */ 0x8a, 0x9a, 0xde, 0xfe, 0x8d, 0x9d, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd9, 0xf9, /* 0x68-0x6f */ 0xdb, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x8f, 0x9f, 0xaf, 0xbf, 0x8e, 0x9e, 0x00, /* 0x78-0x7f */ }; static const unsigned char page02[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa1, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0xa2, 0xff, 0x00, 0xb2, 0x00, 0xbd, 0x00, 0x00, /* 0xd8-0xdf */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */ 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ }; static const unsigned char page21[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, page02, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, page21, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x00, 0x82, 0x00, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x00, 0x89, 0x9a, 0x8b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xb3, 0xa4, 0xb9, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xba, 0xab, 0xac, 0xad, 0xae, 0xbf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbe, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xc0-0xc7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xc8-0xcf */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* 0xd0-0xd7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x00, 0x82, 0x00, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x00, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x99, 0x8a, 0x9b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xa3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xa5, 0xaa, 0xbb, 0xbc, 0xbd, 0xbc, 0xaf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x00, /* 0xd8-0xdf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xe0-0xe7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xe8-0xef */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xf7, /* 0xf0-0xf7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp1250", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp1250(void) { return register_nls(&table); } static void __exit exit_nls_cp1250(void) { unregister_nls(&table); } module_init(init_nls_cp1250) module_exit(exit_nls_cp1250) MODULE_LICENSE("Dual BSD/GPL"); |
44 38 44 4 17 21 10 2 2 2 43 32 39 43 44 44 1 23 13 44 44 16 5 5 5 3 21 43 44 43 41 13 44 4 39 9 29 3 3 38 21 15 2 10 9 4 1 4 3 3 2 3 1 3 3 20 16 15 20 3 3 1 3 1 1 1 1 1 1 5 5 1 5 2 2 13 3 2 19 13 1 20 3 4 5 2 19 13 20 13 16 13 20 1 2 2 1 2 1 1 2 2 2 1 2 2 2 2 2 2 2 3 2 3 3 1 3 3 2 2 2 2 9 2 9 10 17 17 9 15 10 9 2 9 12 17 2 15 6 1 6 6 6 6 6 2 6 7 6 6 2 6 7 6 2 7 7 7 5 5 5 17 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 | // SPDX-License-Identifier: GPL-2.0+ /* * NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. */ #include <linux/slab.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/pagevec.h> #include "nilfs.h" #include "page.h" #include "btnode.h" #include "btree.h" #include "alloc.h" #include "dat.h" static void __nilfs_btree_init(struct nilfs_bmap *bmap); static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { struct nilfs_btree_path *path; int level = NILFS_BTREE_LEVEL_DATA; path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); if (path == NULL) goto out; for (; level < NILFS_BTREE_LEVEL_MAX; level++) { path[level].bp_bh = NULL; path[level].bp_sib_bh = NULL; path[level].bp_index = 0; path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; path[level].bp_op = NULL; } out: return path; } static void nilfs_btree_free_path(struct nilfs_btree_path *path) { int level = NILFS_BTREE_LEVEL_DATA; for (; level < NILFS_BTREE_LEVEL_MAX; level++) brelse(path[level].bp_bh); kmem_cache_free(nilfs_btree_path_cache, path); } /* * B-tree node operations */ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); if (!bh) return -ENOMEM; set_buffer_nilfs_volatile(bh); *bhp = bh; return 0; } static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) { return node->bn_flags; } static void nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) { node->bn_flags = flags; } static int nilfs_btree_node_root(const struct nilfs_btree_node *node) { return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; } static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) { return node->bn_level; } static void nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) { node->bn_level = level; } static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) { return le16_to_cpu(node->bn_nchildren); } static void nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) { node->bn_nchildren = cpu_to_le16(nchildren); } static int nilfs_btree_node_size(const struct nilfs_bmap *btree) { return i_blocksize(btree->b_inode); } static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) { return btree->b_nchildren_per_block; } static __le64 * nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) { return (__le64 *)((char *)(node + 1) + (nilfs_btree_node_root(node) ? 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); } static __le64 * nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) { return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); } static __u64 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) { return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); } static void nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) { *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); } static __u64 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, int ncmax) { return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); } static void nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, int ncmax) { *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); } static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, int level, int nchildren, int ncmax, const __u64 *keys, const __u64 *ptrs) { __le64 *dkeys; __le64 *dptrs; int i; nilfs_btree_node_set_flags(node, flags); nilfs_btree_node_set_level(node, level); nilfs_btree_node_set_nchildren(node, nchildren); dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nchildren; i++) { dkeys[i] = cpu_to_le64(keys[i]); dptrs[i] = cpu_to_le64(ptrs[i]); } } /* Assume the buffer heads corresponding to left and right are locked. */ static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, struct nilfs_btree_node *right, int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); lnchildren += n; rnchildren -= n; nilfs_btree_node_set_nchildren(left, lnchildren); nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer heads corresponding to left and right are locked. */ static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, struct nilfs_btree_node *right, int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); lnchildren -= n; rnchildren += n; nilfs_btree_node_set_nchildren(left, lnchildren); nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer head corresponding to node is locked. */ static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, __u64 key, __u64 ptr, int ncmax) { __le64 *dkeys; __le64 *dptrs; int nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); nchildren = nilfs_btree_node_get_nchildren(node); if (index < nchildren) { memmove(dkeys + index + 1, dkeys + index, (nchildren - index) * sizeof(*dkeys)); memmove(dptrs + index + 1, dptrs + index, (nchildren - index) * sizeof(*dptrs)); } dkeys[index] = cpu_to_le64(key); dptrs[index] = cpu_to_le64(ptr); nchildren++; nilfs_btree_node_set_nchildren(node, nchildren); } /* Assume that the buffer head corresponding to node is locked. */ static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, __u64 *keyp, __u64 *ptrp, int ncmax) { __u64 key; __u64 ptr; __le64 *dkeys; __le64 *dptrs; int nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); key = le64_to_cpu(dkeys[index]); ptr = le64_to_cpu(dptrs[index]); nchildren = nilfs_btree_node_get_nchildren(node); if (keyp != NULL) *keyp = key; if (ptrp != NULL) *ptrp = ptr; if (index < nchildren - 1) { memmove(dkeys + index, dkeys + index + 1, (nchildren - index - 1) * sizeof(*dkeys)); memmove(dptrs + index, dptrs + index + 1, (nchildren - index - 1) * sizeof(*dptrs)); } nchildren--; nilfs_btree_node_set_nchildren(node, nchildren); } static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, __u64 key, int *indexp) { __u64 nkey; int index, low, high, s; /* binary search */ low = 0; high = nilfs_btree_node_get_nchildren(node) - 1; index = 0; s = 0; while (low <= high) { index = (low + high) / 2; nkey = nilfs_btree_node_get_key(node, index); if (nkey == key) { s = 0; goto out; } else if (nkey < key) { low = index + 1; s = -1; } else { high = index - 1; s = 1; } } /* adjust index */ if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { if (s > 0 && index > 0) index--; } else if (s < 0) index++; out: *indexp = index; return s == 0; } /** * nilfs_btree_node_broken - verify consistency of btree node * @node: btree node block to be examined * @size: node size (in bytes) * @inode: host inode of btree * @blocknr: block number * * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. */ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, size_t size, struct inode *inode, sector_t blocknr) { int level, flags, nchildren; int ret = 0; level = nilfs_btree_node_get_level(node); flags = nilfs_btree_node_get_flags(node); nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || nchildren < 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, (unsigned long long)blocknr, level, flags, nchildren); ret = 1; } return ret; } /** * nilfs_btree_root_broken - verify consistency of btree root node * @node: btree root node to be examined * @inode: host inode of btree * * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. */ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, struct inode *inode) { int level, flags, nchildren; int ret = 0; level = nilfs_btree_node_get_level(node); flags = nilfs_btree_node_get_flags(node); nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); ret = 1; } return ret; } int nilfs_btree_broken_node_block(struct buffer_head *bh) { struct inode *inode; int ret; if (buffer_nilfs_checked(bh)) return 0; inode = bh->b_folio->mapping->host; ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, bh->b_size, inode, bh->b_blocknr); if (likely(!ret)) set_buffer_nilfs_checked(bh); return ret; } static struct nilfs_btree_node * nilfs_btree_get_root(const struct nilfs_bmap *btree) { return (struct nilfs_btree_node *)btree->b_u.u_data; } static struct nilfs_btree_node * nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_bh->b_data; } static struct nilfs_btree_node * nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; } static int nilfs_btree_height(const struct nilfs_bmap *btree) { return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; } static struct nilfs_btree_node * nilfs_btree_get_node(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, int level, int *ncmaxp) { struct nilfs_btree_node *node; if (level == nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_root(btree); *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; } else { node = nilfs_btree_get_nonroot_node(path, level); *ncmaxp = nilfs_btree_nchildren_per_block(btree); } return node; } static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, struct nilfs_btree_node *node, int level) { if (unlikely(nilfs_btree_node_get_level(node) != level)) { dump_stack(); nilfs_crit(btree->b_inode->i_sb, "btree level mismatch (ino=%lu): %d != %d", btree->b_inode->i_ino, nilfs_btree_node_get_level(node), level); return 1; } return 0; } struct nilfs_btree_readahead_info { struct nilfs_btree_node *node; /* parent node */ int max_ra_blocks; /* max nof blocks to read ahead */ int index; /* current index on the parent node */ int ncmax; /* nof children in the parent node */ }; static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { if (likely(ret == -EEXIST)) goto out_check; if (ret == -ENOENT) { /* * Block address translation failed due to invalid * value of 'ptr'. In this case, return internal code * -EINVAL (broken bmap) to notify bmap layer of fatal * metadata corruption. */ ret = -EINVAL; } return ret; } if (ra) { int i, n; __u64 ptr2; /* read ahead sibling nodes */ for (n = ra->max_ra_blocks, i = ra->index + 1; n > 0 && i < ra->ncmax; n--, i++) { ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); ret = nilfs_btnode_submit_block(btnc, ptr2, 0, REQ_OP_READ | REQ_RAHEAD, &ra_bh, &submit_ptr); if (likely(!ret || ret == -EEXIST)) brelse(ra_bh); else if (ret != -EBUSY) break; if (!buffer_locked(bh)) goto out_no_wait; } } wait_on_buffer(bh); out_no_wait: if (!buffer_uptodate(bh)) { nilfs_err(btree->b_inode->i_sb, "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", btree->b_inode->i_ino, (unsigned long long)ptr); brelse(bh); return -EIO; } out_check: if (nilfs_btree_broken_node_block(bh)) { clear_buffer_uptodate(bh); brelse(bh); return -EINVAL; } *bhp = bh; return 0; } static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { return __nilfs_btree_get_block(btree, ptr, bhp, NULL); } static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 key, __u64 *ptrp, int minlevel, int readahead) { struct nilfs_btree_node *node; struct nilfs_btree_readahead_info p, *ra; __u64 ptr; int level, index, found, ncmax, ret; node = nilfs_btree_get_root(btree); level = nilfs_btree_node_get_level(node); if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) return -ENOENT; found = nilfs_btree_node_lookup(node, key, &index); ptr = nilfs_btree_node_get_ptr(node, index, NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; ncmax = nilfs_btree_nchildren_per_block(btree); while (--level >= minlevel) { ra = NULL; if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); p.index = index; p.max_ra_blocks = 7; ra = &p; } ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, ra); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; if (!found) found = nilfs_btree_node_lookup(node, key, &index); else index = 0; if (index < ncmax) { ptr = nilfs_btree_node_get_ptr(node, index, ncmax); } else { WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); /* insert */ ptr = NILFS_BMAP_INVALID_PTR; } path[level].bp_index = index; } if (!found) return -ENOENT; if (ptrp != NULL) *ptrp = ptr; return 0; } static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; __u64 ptr; int index, level, ncmax, ret; node = nilfs_btree_get_root(btree); index = nilfs_btree_node_get_nchildren(node) - 1; if (index < 0) return -ENOENT; level = nilfs_btree_node_get_level(node); ptr = nilfs_btree_node_get_ptr(node, index, NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; ncmax = nilfs_btree_nchildren_per_block(btree); for (level--; level > 0; level--) { ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; index = nilfs_btree_node_get_nchildren(node) - 1; ptr = nilfs_btree_node_get_ptr(node, index, ncmax); path[level].bp_index = index; } if (keyp != NULL) *keyp = nilfs_btree_node_get_key(node, index); if (ptrp != NULL) *ptrp = ptr; return 0; } /** * nilfs_btree_get_next_key - get next valid key from btree path array * @btree: bmap struct of btree * @path: array of nilfs_btree_path struct * @minlevel: start level * @nextkey: place to store the next valid key * * Return Value: If a next key was found, 0 is returned. Otherwise, * -ENOENT is returned. */ static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, int minlevel, __u64 *nextkey) { struct nilfs_btree_node *node; int maxlevel = nilfs_btree_height(btree) - 1; int index, next_adj, level; /* Next index is already set to bp_index for leaf nodes. */ next_adj = 0; for (level = minlevel; level <= maxlevel; level++) { if (level == maxlevel) node = nilfs_btree_get_root(btree); else node = nilfs_btree_get_nonroot_node(path, level); index = path[level].bp_index + next_adj; if (index < nilfs_btree_node_get_nchildren(node)) { /* Next key is in this node */ *nextkey = nilfs_btree_node_get_key(node, index); return 0; } /* For non-leaf nodes, next index is stored at bp_index + 1. */ next_adj = 1; } return -ENOENT; } static int nilfs_btree_lookup(const struct nilfs_bmap *btree, __u64 key, int level, __u64 *ptrp) { struct nilfs_btree_path *path; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, __u64 key, __u64 *ptrp, unsigned int maxblocks) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; struct inode *dat = NULL; __u64 ptr, ptr2; sector_t blocknr; int level = NILFS_BTREE_LEVEL_NODE_MIN; int ret, cnt, index, maxlevel, ncmax; struct nilfs_btree_readahead_info p; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); if (ret < 0) goto out; if (NILFS_BMAP_USE_VBN(btree)) { dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) goto out; ptr = blocknr; } cnt = 1; if (cnt == maxblocks) goto end; maxlevel = nilfs_btree_height(btree) - 1; node = nilfs_btree_get_node(btree, path, level, &ncmax); index = path[level].bp_index + 1; for (;;) { while (index < nilfs_btree_node_get_nchildren(node)) { if (nilfs_btree_node_get_key(node, index) != key + cnt) goto end; ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) goto out; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) goto end; index++; } if (level == maxlevel) break; /* look-up right sibling node */ p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); p.index = path[level + 1].bp_index + 1; p.max_ra_blocks = 7; if (p.index >= nilfs_btree_node_get_nchildren(p.node) || nilfs_btree_node_get_key(p.node, p.index) != key + cnt) break; ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); path[level + 1].bp_index = p.index; brelse(path[level].bp_bh); path[level].bp_bh = NULL; ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, &p); if (ret < 0) goto out; node = nilfs_btree_get_nonroot_node(path, level); ncmax = nilfs_btree_nchildren_per_block(btree); index = 0; path[level].bp_index = index; } end: *ptrp = ptr; ret = cnt; out: nilfs_btree_free_path(path); return ret; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 key) { if (level < nilfs_btree_height(btree) - 1) { do { nilfs_btree_node_set_key( nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } /* root */ if (level == nilfs_btree_height(btree) - 1) { nilfs_btree_node_set_key(nilfs_btree_get_root(btree), path[level].bp_index, key); } } static void nilfs_btree_do_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, NILFS_BTREE_ROOT_NCHILDREN_MAX); } } static void nilfs_btree_carry_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + lnchildren + 1) / 2 - lnchildren; if (n > path[level].bp_index) { /* move insert point */ n--; move = 1; } nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); if (move) { brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += lnchildren; path[level + 1].bp_index--; } else { brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index -= n; } nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } static void nilfs_btree_carry_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + rnchildren + 1) / 2 - rnchildren; if (n > nchildren - path[level].bp_index) { /* move insert point */ n--; move = 1; } nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; if (move) { brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index -= nilfs_btree_node_get_nchildren(node); path[level + 1].bp_index++; } else { brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } static void nilfs_btree_split(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + 1) / 2; if (n > nchildren - path[level].bp_index) { n--; move = 1; } nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); if (move) { path[level].bp_index -= nilfs_btree_node_get_nchildren(node); nilfs_btree_node_insert(right, path[level].bp_index, *keyp, *ptrp, ncblk); *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; } else { nilfs_btree_do_insert(btree, path, level, keyp, ptrp); *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } path[level + 1].bp_index++; } static void nilfs_btree_grow(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; int n, ncblk; root = nilfs_btree_get_root(btree); child = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(root); nilfs_btree_node_move_right(root, child, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btree_node_set_level(root, level + 1); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; nilfs_btree_do_insert(btree, path, level, keyp, ptrp); *keyp = nilfs_btree_node_get_key(child, 0); *ptrp = path[level].bp_newreq.bpr_ptr; } static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path) { struct nilfs_btree_node *node; int level, ncmax; if (path == NULL) return NILFS_BMAP_INVALID_PTR; /* left sibling */ level = NILFS_BTREE_LEVEL_NODE_MIN; if (path[level].bp_index > 0) { node = nilfs_btree_get_node(btree, path, level, &ncmax); return nilfs_btree_node_get_ptr(node, path[level].bp_index - 1, ncmax); } /* parent */ level = NILFS_BTREE_LEVEL_NODE_MIN + 1; if (level <= nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_node(btree, path, level, &ncmax); return nilfs_btree_node_get_ptr(node, path[level].bp_index, ncmax); } return NILFS_BMAP_INVALID_PTR; } static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, __u64 key) { __u64 ptr; ptr = nilfs_bmap_find_target_seq(btree, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; ptr = nilfs_btree_find_near(btree, path); if (ptr != NILFS_BMAP_INVALID_PTR) /* near */ return ptr; /* block group */ return nilfs_bmap_find_target_in_group(btree); } static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, __u64 key, __u64 ptr, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; int pindex, level, ncmax, ncblk, ret; struct inode *dat = NULL; stats->bs_nblocks = 0; level = NILFS_BTREE_LEVEL_DATA; /* allocate a new ptr for data block */ if (NILFS_BMAP_USE_VBN(btree)) { path[level].bp_newreq.bpr_ptr = nilfs_btree_find_target_v(btree, path, key); dat = nilfs_bmap_get_dat(btree); } ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_data; ncblk = nilfs_btree_nchildren_per_block(btree); for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_node_get_nchildren(node) < ncblk) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; /* left sibling */ if (pindex > 0) { sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_left; stats->bs_nblocks++; goto out; } else { brelse(bh); } } /* right sibling */ if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_right; stats->bs_nblocks++; goto out; } else { brelse(bh); } } /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, &bh); if (ret < 0) goto err_out_curr_node; stats->bs_nblocks++; sib = (struct nilfs_btree_node *)bh->b_data; nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_split; } /* root */ node = nilfs_btree_get_root(btree); if (nilfs_btree_node_get_nchildren(node) < NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, &bh); if (ret < 0) goto err_out_curr_node; nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_grow; level++; path[level].bp_op = nilfs_btree_do_insert; /* a newly-created node block and a data block are added */ stats->bs_nblocks += 2; /* success */ out: *levelp = level; return ret; /* error */ err_out_curr_node: nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { nilfs_btnode_delete(path[level].bp_sib_bh); nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); } nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_data: *levelp = level; stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, __u64 key, __u64 ptr) { struct inode *dat = NULL; int level; set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; if (NILFS_BMAP_USE_VBN(btree)) { nilfs_bmap_set_target_v(btree, key, ptr); dat = nilfs_bmap_get_dat(btree); } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { nilfs_bmap_commit_alloc_ptr(btree, &path[level - 1].bp_newreq, dat); path[level].bp_op(btree, path, level, &key, &ptr); } if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) { struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret != -ENOENT) { if (ret == 0) ret = -EEXIST; goto out; } ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); if (ret < 0) goto out; nilfs_btree_commit_insert(btree, path, level, key, ptr); nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } static void nilfs_btree_do_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, NILFS_BTREE_ROOT_NCHILDREN_MAX); } } static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + lnchildren) / 2 - nchildren; nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index += n; } static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + rnchildren) / 2 - nchildren; nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } static void nilfs_btree_concat_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(node); nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += nilfs_btree_node_get_nchildren(left); } static void nilfs_btree_concat_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(right); nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; } static void nilfs_btree_shrink(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); root = nilfs_btree_get_root(btree); child = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_delete(root, 0, NULL, NULL, NILFS_BTREE_ROOT_NCHILDREN_MAX); nilfs_btree_node_set_level(root, level); n = nilfs_btree_node_get_nchildren(child); nilfs_btree_node_move_left(root, child, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; } static void nilfs_btree_nop(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { } static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, struct nilfs_bmap_stats *stats, struct inode *dat) { struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; int pindex, dindex, level, ncmin, ncmax, ncblk, ret; ret = 0; stats->bs_nblocks = 0; ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); ncblk = nilfs_btree_nchildren_per_block(btree); for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(node, dindex, ncblk); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; if (nilfs_btree_node_get_nchildren(node) > ncmin) { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; dindex = pindex; if (pindex > 0) { /* left sibling */ sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_left; stats->bs_nblocks++; goto out; } else { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_left; stats->bs_nblocks++; /* continue; */ } } else if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { /* right sibling */ sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_right; stats->bs_nblocks++; goto out; } else { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_right; stats->bs_nblocks++; /* * When merging right sibling node * into the current node, pointer to * the right sibling node must be * terminated instead. The adjustment * below is required for that. */ dindex = pindex + 1; /* continue; */ } } else { /* no siblings */ /* the only child of the root node */ WARN_ON(level != nilfs_btree_height(btree) - 2); if (nilfs_btree_node_get_nchildren(node) - 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_shrink; stats->bs_nblocks += 2; level++; path[level].bp_op = nilfs_btree_nop; goto shrink_root_child; } else { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } } } /* child of the root node is deleted */ path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; shrink_root_child: node = nilfs_btree_get_root(btree); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(node, dindex, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; /* success */ out: *levelp = level; return ret; /* error */ err_out_curr_node: nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { brelse(path[level].bp_sib_bh); nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); } *levelp = level; stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, struct inode *dat) { int level; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); path[level].bp_op(btree, path, level, NULL, NULL); } if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) { struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; struct inode *dat; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret < 0) goto out; dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); if (ret < 0) goto out; nilfs_btree_commit_delete(btree, path, level, dat); nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, __u64 *keyp) { struct nilfs_btree_path *path; const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; int ret; path = nilfs_btree_alloc_path(); if (!path) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); if (!ret) *keyp = start; else if (ret == -ENOENT) ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) { struct nilfs_btree_path *path; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) { struct buffer_head *bh; struct nilfs_btree_node *root, *node; __u64 maxkey, nextmaxkey; __u64 ptr; int nchildren, ret; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; break; default: return 0; } nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; brelse(bh); return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } static int nilfs_btree_gather_data(struct nilfs_bmap *btree, __u64 *keys, __u64 *ptrs, int nitems) { struct buffer_head *bh; struct nilfs_btree_node *node, *root; __le64 *dkeys; __le64 *dptrs; __u64 ptr; int nchildren, ncmax, i, ret; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; break; case 3: nchildren = nilfs_btree_node_get_nchildren(root); WARN_ON(nchildren > 1); ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; ncmax = nilfs_btree_nchildren_per_block(btree); break; default: node = NULL; return -EINVAL; } nchildren = nilfs_btree_node_get_nchildren(node); if (nchildren < nitems) nitems = nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nitems; i++) { keys[i] = le64_to_cpu(dkeys[i]); ptrs[i] = le64_to_cpu(dptrs[i]); } brelse(bh); return nitems; } static int nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head **bhp, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; struct inode *dat = NULL; int ret; stats->bs_nblocks = 0; /* for data */ /* cannot find near ptr */ if (NILFS_BMAP_USE_VBN(btree)) { dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); dat = nilfs_bmap_get_dat(btree); } ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); if (ret < 0) return ret; ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; *bhp = NULL; stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); if (ret < 0) goto err_out_dreq; ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); if (ret < 0) goto err_out_nreq; *bhp = bh; stats->bs_nblocks++; } /* success */ return 0; /* error */ err_out_nreq: nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); err_out_dreq: nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head *bh) { struct nilfs_btree_node *node; struct inode *dat; __u64 tmpptr; int ncblk; /* free resources */ if (btree->b_ops->bop_clear != NULL) btree->b_ops->bop_clear(btree); /* ptr must be a pointer to a buffer head. */ set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); /* convert and insert */ dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; __nilfs_btree_init(btree); if (nreq != NULL) { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); /* create child node at level 1 */ node = (struct nilfs_btree_node *)bh->b_data; ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); if (!buffer_dirty(bh)) mark_buffer_dirty(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); brelse(bh); /* create root node at level 2 */ node = nilfs_btree_get_root(btree); tmpptr = nreq->bpr_ptr; nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, NILFS_BTREE_ROOT_NCHILDREN_MAX, &keys[0], &tmpptr); } else { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, NILFS_BTREE_ROOT_NCHILDREN_MAX); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } if (NILFS_BMAP_USE_VBN(btree)) nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); } /** * nilfs_btree_convert_and_insert - * @bmap: * @key: * @ptr: * @keys: * @ptrs: * @n: */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n) { struct buffer_head *bh = NULL; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; struct nilfs_bmap_stats stats; int ret; if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { di = &dreq; ni = NULL; } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( nilfs_btree_node_size(btree))) { di = &dreq; ni = &nreq; } else { di = NULL; ni = NULL; BUG(); } ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, &stats); if (ret < 0) return ret; nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, di, ni, bh); nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); return 0; } static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); return 0; } static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); if (ret < 0) return ret; if (buffer_nilfs_node(path[level].bp_bh)) { path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); return ret; } } return 0; } static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; int ncmax; nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req, btree->b_ptr_type == NILFS_BMAP_PTR_VS); if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } set_buffer_nilfs_volatile(path[level].bp_bh); parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, path[level].bp_newreq.bpr_ptr, ncmax); } static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int *maxlevelp, struct inode *dat) { int level, ret; level = minlevel; if (!buffer_nilfs_volatile(path[level].bp_bh)) { ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) return ret; } while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) { WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) goto out; } /* success */ *maxlevelp = level - 1; return 0; /* error */ out: while (--level > minlevel) nilfs_btree_abort_update_v(btree, path, level, dat); if (!buffer_nilfs_volatile(path[level].bp_bh)) nilfs_btree_abort_update_v(btree, path, level, dat); return ret; } static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int maxlevel, struct buffer_head *bh, struct inode *dat) { int level; if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) nilfs_btree_commit_update_v(btree, path, minlevel, dat); for (level = minlevel + 1; level <= maxlevel; level++) nilfs_btree_commit_update_v(btree, path, level, dat); } static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { int maxlevel = 0, ret; struct nilfs_btree_node *parent; struct inode *dat = nilfs_bmap_get_dat(btree); __u64 ptr; int ncmax; get_bh(bh); path[level].bp_bh = bh; ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, dat); if (ret < 0) goto out; if (buffer_nilfs_volatile(path[level].bp_bh)) { parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); ret = nilfs_dat_mark_dirty(dat, ptr); if (ret < 0) goto out; } nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); out: brelse(path[level].bp_bh); path[level].bp_bh = NULL; return ret; } static int nilfs_btree_propagate(struct nilfs_bmap *btree, struct buffer_head *bh) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; WARN_ON(!buffer_dirty(bh)); path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; if (buffer_nilfs_node(bh)) { node = (struct nilfs_btree_node *)bh->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { key = nilfs_bmap_data_get_key(btree, bh); level = NILFS_BTREE_LEVEL_DATA; } ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { if (unlikely(ret == -ENOENT)) nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); goto out; } ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_propagate_v(btree, path, level, bh) : nilfs_btree_propagate_p(btree, path, level, bh); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, struct buffer_head *bh) { return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); } static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, struct list_head *lists, struct buffer_head *bh) { struct list_head *head; struct buffer_head *cbh; struct nilfs_btree_node *node, *cnode; __u64 key, ckey; int level; get_bh(bh); node = (struct nilfs_btree_node *)bh->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); if (level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX) { dump_stack(); nilfs_warn(btree->b_inode->i_sb, "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", level, (unsigned long long)key, btree->b_inode->i_ino, (unsigned long long)bh->b_blocknr); return; } list_for_each(head, &lists[level]) { cbh = list_entry(head, struct buffer_head, b_assoc_buffers); cnode = (struct nilfs_btree_node *)cbh->b_data; ckey = nilfs_btree_node_get_key(cnode, 0); if (key < ckey) break; } list_add_tail(&bh->b_assoc_buffers, head); } static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct folio_batch fbatch; struct buffer_head *bh, *head; pgoff_t index = 0; int level, i; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) INIT_LIST_HEAD(&lists[level]); folio_batch_init(&fbatch); while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { bh = head = folio_buffers(fbatch.folios[i]); do { if (buffer_dirty(bh)) nilfs_btree_add_dirty_buffer(btree, lists, bh); } while ((bh = bh->b_this_page) != head); } folio_batch_release(&fbatch); cond_resched(); } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) list_splice_tail(&lists[level], listp); } static int nilfs_btree_assign_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; __u64 key; __u64 ptr; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); if (buffer_nilfs_node(*bh)) { path[level].bp_ctxt.oldkey = ptr; path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, ncmax); key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } static int nilfs_btree_assign_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; struct inode *dat = nilfs_bmap_get_dat(btree); __u64 key; __u64 ptr; union nilfs_bmap_ptr_req req; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); req.bpr_ptr = ptr; ret = nilfs_dat_prepare_start(dat, &req.bpr_req); if (ret < 0) return ret; nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } static int nilfs_btree_assign(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { key = nilfs_bmap_data_get_key(btree, *bh); level = NILFS_BTREE_LEVEL_DATA; } ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *node; __u64 key; int ret; ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, blocknr); if (ret < 0) return ret; if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; key = nilfs_btree_node_get_key(node, 0); } else key = nilfs_bmap_data_get_key(btree, *bh); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) { struct buffer_head *bh; struct nilfs_btree_path *path; __u64 ptr; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } if (!buffer_dirty(bh)) mark_buffer_dirty(bh); brelse(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); out: nilfs_btree_free_path(path); return ret; } static const struct nilfs_bmap_operations nilfs_btree_ops = { .bop_lookup = nilfs_btree_lookup, .bop_lookup_contig = nilfs_btree_lookup_contig, .bop_insert = nilfs_btree_insert, .bop_delete = nilfs_btree_delete, .bop_clear = NULL, .bop_propagate = nilfs_btree_propagate, .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, .bop_assign = nilfs_btree_assign, .bop_mark = nilfs_btree_mark, .bop_seek_key = nilfs_btree_seek_key, .bop_last_key = nilfs_btree_last_key, .bop_check_insert = NULL, .bop_check_delete = nilfs_btree_check_delete, .bop_gather_data = nilfs_btree_gather_data, }; static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_lookup = NULL, .bop_lookup_contig = NULL, .bop_insert = NULL, .bop_delete = NULL, .bop_clear = NULL, .bop_propagate = nilfs_btree_propagate_gc, .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, .bop_assign = nilfs_btree_assign_gc, .bop_mark = NULL, .bop_seek_key = NULL, .bop_last_key = NULL, .bop_check_insert = NULL, .bop_check_delete = NULL, .bop_gather_data = NULL, }; static void __nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); } int nilfs_btree_init(struct nilfs_bmap *bmap) { int ret = 0; __nilfs_btree_init(bmap); if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) ret = -EIO; else ret = nilfs_attach_btree_node_cache( &NILFS_BMAP_I(bmap)->vfs_inode); return ret; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops_gc; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); } |
52 49 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | // SPDX-License-Identifier: GPL-2.0-or-later /* Null security operations. * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <net/af_rxrpc.h> #include "ar-internal.h" static int none_init_connection_security(struct rxrpc_connection *conn, struct rxrpc_key_token *token) { return 0; } /* * Work out how much data we can put in an unsecured packet. */ static int none_how_much_data(struct rxrpc_call *call, size_t remain, size_t *_buf_size, size_t *_data_size, size_t *_offset) { *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); *_offset = 0; return 0; } static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { return 0; } static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); sp->flags |= RXRPC_RX_VERIFIED; return 0; } static void none_free_call_crypto(struct rxrpc_call *call) { } static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb) { return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, rxrpc_eproto_rxnull_challenge); } static int none_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb) { return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, rxrpc_eproto_rxnull_response); } static void none_clear(struct rxrpc_connection *conn) { } static int none_init(void) { return 0; } static void none_exit(void) { } /* * RxRPC Kerberos-based security */ const struct rxrpc_security rxrpc_no_security = { .name = "none", .security_index = RXRPC_SECURITY_NONE, .init = none_init, .exit = none_exit, .init_connection_security = none_init_connection_security, .free_call_crypto = none_free_call_crypto, .how_much_data = none_how_much_data, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, }; |
53 8 23 23 1 25 23 3 23 14 9 1 1 7 7 275 275 7 1996 1957 2 23 3 1 22 1 1 1 2174 2156 611 1 14 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 | // SPDX-License-Identifier: GPL-2.0 /* * This contains functions for filename crypto management * * Copyright (C) 2015, Google, Inc. * Copyright (C) 2015, Motorola Mobility * * Written by Uday Savagaonkar, 2014. * Modified by Jaegeuk Kim, 2015. * * This has not yet undergone a rigorous security audit. */ #include <linux/namei.h> #include <linux/scatterlist.h> #include <crypto/hash.h> #include <crypto/sha2.h> #include <crypto/skcipher.h> #include "fscrypt_private.h" /* * The minimum message length (input and output length), in bytes, for all * filenames encryption modes. Filenames shorter than this will be zero-padded * before being encrypted. */ #define FSCRYPT_FNAME_MIN_MSG_LEN 16 /* * struct fscrypt_nokey_name - identifier for directory entry when key is absent * * When userspace lists an encrypted directory without access to the key, the * filesystem must present a unique "no-key name" for each filename that allows * it to find the directory entry again if requested. Naively, that would just * mean using the ciphertext filenames. However, since the ciphertext filenames * can contain illegal characters ('\0' and '/'), they must be encoded in some * way. We use base64url. But that can cause names to exceed NAME_MAX (255 * bytes), so we also need to use a strong hash to abbreviate long names. * * The filesystem may also need another kind of hash, the "dirhash", to quickly * find the directory entry. Since filesystems normally compute the dirhash * over the on-disk filename (i.e. the ciphertext), it's not computable from * no-key names that abbreviate the ciphertext using the strong hash to fit in * NAME_MAX. It's also not computable if it's a keyed hash taken over the * plaintext (but it may still be available in the on-disk directory entry); * casefolded directories use this type of dirhash. At least in these cases, * each no-key name must include the name's dirhash too. * * To meet all these requirements, we base64url-encode the following * variable-length structure. It contains the dirhash, or 0's if the filesystem * didn't provide one; up to 149 bytes of the ciphertext name; and for * ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes. * * This ensures that each no-key name contains everything needed to find the * directory entry again, contains only legal characters, doesn't exceed * NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only * take the performance hit of SHA-256 on very long filenames (which are rare). */ struct fscrypt_nokey_name { u32 dirhash[2]; u8 bytes[149]; u8 sha256[SHA256_DIGEST_SIZE]; }; /* 189 bytes => 252 bytes base64url-encoded, which is <= NAME_MAX (255) */ /* * Decoded size of max-size no-key name, i.e. a name that was abbreviated using * the strong hash and thus includes the 'sha256' field. This isn't simply * sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included. */ #define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256) /* Encoded size of max-size no-key name */ #define FSCRYPT_NOKEY_NAME_MAX_ENCODED \ FSCRYPT_BASE64URL_CHARS(FSCRYPT_NOKEY_NAME_MAX) static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') return true; if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') return true; return false; } /** * fscrypt_fname_encrypt() - encrypt a filename * @inode: inode of the parent directory (for regular filenames) * or of the symlink (for symlink targets). Key must already be * set up. * @iname: the filename to encrypt * @out: (output) the encrypted filename * @olen: size of the encrypted filename. It must be at least @iname->len. * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); const struct fscrypt_inode_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_enc_key.tfm; union fscrypt_iv iv; struct scatterlist sg; int res; /* * Copy the filename to the output buffer for encrypting in-place and * pad it with the needed number of NUL bytes. */ if (WARN_ON_ONCE(olen < iname->len)) return -ENOBUFS; memcpy(out, iname->name, iname->len); memset(out + iname->len, 0, olen - iname->len); /* Initialize the IV */ fscrypt_generate_iv(&iv, 0, ci); /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); if (!req) return -ENOMEM; skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); sg_init_one(&sg, out, olen); skcipher_request_set_crypt(req, &sg, &sg, olen, &iv); /* Do the encryption */ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res < 0) { fscrypt_err(inode, "Filename encryption failed: %d", res); return res; } return 0; } EXPORT_SYMBOL_GPL(fscrypt_fname_encrypt); /** * fname_decrypt() - decrypt a filename * @inode: inode of the parent directory (for regular filenames) * or of the symlink (for symlink targets) * @iname: the encrypted filename to decrypt * @oname: (output) the decrypted filename. The caller must have allocated * enough space for this, e.g. using fscrypt_fname_alloc_buffer(). * * Return: 0 on success, -errno on failure */ static int fname_decrypt(const struct inode *inode, const struct fscrypt_str *iname, struct fscrypt_str *oname) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; const struct fscrypt_inode_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_enc_key.tfm; union fscrypt_iv iv; int res; /* Allocate request */ req = skcipher_request_alloc(tfm, GFP_NOFS); if (!req) return -ENOMEM; skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); /* Initialize IV */ fscrypt_generate_iv(&iv, 0, ci); /* Create decryption request */ sg_init_one(&src_sg, iname->name, iname->len); sg_init_one(&dst_sg, oname->name, oname->len); skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, &iv); res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); skcipher_request_free(req); if (res < 0) { fscrypt_err(inode, "Filename decryption failed: %d", res); return res; } oname->len = strnlen(oname->name, iname->len); return 0; } static const char base64url_table[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; #define FSCRYPT_BASE64URL_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) /** * fscrypt_base64url_encode() - base64url-encode some binary data * @src: the binary data to encode * @srclen: the length of @src in bytes * @dst: (output) the base64url-encoded string. Not NUL-terminated. * * Encodes data using base64url encoding, i.e. the "Base 64 Encoding with URL * and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't used, * as it's unneeded and not required by the RFC. base64url is used instead of * base64 to avoid the '/' character, which isn't allowed in filenames. * * Return: the length of the resulting base64url-encoded string in bytes. * This will be equal to FSCRYPT_BASE64URL_CHARS(srclen). */ static int fscrypt_base64url_encode(const u8 *src, int srclen, char *dst) { u32 ac = 0; int bits = 0; int i; char *cp = dst; for (i = 0; i < srclen; i++) { ac = (ac << 8) | src[i]; bits += 8; do { bits -= 6; *cp++ = base64url_table[(ac >> bits) & 0x3f]; } while (bits >= 6); } if (bits) *cp++ = base64url_table[(ac << (6 - bits)) & 0x3f]; return cp - dst; } /** * fscrypt_base64url_decode() - base64url-decode a string * @src: the string to decode. Doesn't need to be NUL-terminated. * @srclen: the length of @src in bytes * @dst: (output) the decoded binary data * * Decodes a string using base64url encoding, i.e. the "Base 64 Encoding with * URL and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't * accepted, nor are non-encoding characters such as whitespace. * * This implementation hasn't been optimized for performance. * * Return: the length of the resulting decoded binary data in bytes, * or -1 if the string isn't a valid base64url string. */ static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst) { u32 ac = 0; int bits = 0; int i; u8 *bp = dst; for (i = 0; i < srclen; i++) { const char *p = strchr(base64url_table, src[i]); if (p == NULL || src[i] == 0) return -1; ac = (ac << 6) | (p - base64url_table); bits += 6; if (bits >= 8) { bits -= 8; *bp++ = (u8)(ac >> bits); } } if (ac & ((1 << bits) - 1)) return -1; return bp - dst; } bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, u32 orig_len, u32 max_len, u32 *encrypted_len_ret) { int padding = 4 << (fscrypt_policy_flags(policy) & FSCRYPT_POLICY_FLAGS_PAD_MASK); u32 encrypted_len; if (orig_len > max_len) return false; encrypted_len = max_t(u32, orig_len, FSCRYPT_FNAME_MIN_MSG_LEN); encrypted_len = round_up(encrypted_len, padding); *encrypted_len_ret = min(encrypted_len, max_len); return true; } /** * fscrypt_fname_encrypted_size() - calculate length of encrypted filename * @inode: parent inode of dentry name being encrypted. Key must * already be set up. * @orig_len: length of the original filename * @max_len: maximum length to return * @encrypted_len_ret: where calculated length should be returned (on success) * * Filenames that are shorter than the maximum length may have their lengths * increased slightly by encryption, due to padding that is applied. * * Return: false if the orig_len is greater than max_len. Otherwise, true and * fill out encrypted_len_ret with the length (up to max_len). */ bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret) { return __fscrypt_fname_encrypted_size(&inode->i_crypt_info->ci_policy, orig_len, max_len, encrypted_len_ret); } EXPORT_SYMBOL_GPL(fscrypt_fname_encrypted_size); /** * fscrypt_fname_alloc_buffer() - allocate a buffer for presented filenames * @max_encrypted_len: maximum length of encrypted filenames the buffer will be * used to present * @crypto_str: (output) buffer to allocate * * Allocate a buffer that is large enough to hold any decrypted or encoded * filename (null-terminated), for the given maximum encrypted filename length. * * Return: 0 on success, -errno on failure */ int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str) { u32 max_presented_len = max_t(u32, FSCRYPT_NOKEY_NAME_MAX_ENCODED, max_encrypted_len); crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS); if (!crypto_str->name) return -ENOMEM; crypto_str->len = max_presented_len; return 0; } EXPORT_SYMBOL(fscrypt_fname_alloc_buffer); /** * fscrypt_fname_free_buffer() - free a buffer for presented filenames * @crypto_str: the buffer to free * * Free a buffer that was allocated by fscrypt_fname_alloc_buffer(). */ void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { if (!crypto_str) return; kfree(crypto_str->name); crypto_str->name = NULL; } EXPORT_SYMBOL(fscrypt_fname_free_buffer); /** * fscrypt_fname_disk_to_usr() - convert an encrypted filename to * user-presentable form * @inode: inode of the parent directory (for regular filenames) * or of the symlink (for symlink targets) * @hash: first part of the name's dirhash, if applicable. This only needs to * be provided if the filename is located in an indexed directory whose * encryption key may be unavailable. Not needed for symlink targets. * @minor_hash: second part of the name's dirhash, if applicable * @iname: encrypted filename to convert. May also be "." or "..", which * aren't actually encrypted. * @oname: output buffer for the user-presentable filename. The caller must * have allocated enough space for this, e.g. using * fscrypt_fname_alloc_buffer(). * * If the key is available, we'll decrypt the disk name. Otherwise, we'll * encode it for presentation in fscrypt_nokey_name format. * See struct fscrypt_nokey_name for details. * * Return: 0 on success, -errno on failure */ int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); struct fscrypt_nokey_name nokey_name; u32 size; /* size of the unencoded no-key name */ if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; oname->name[iname->len - 1] = '.'; oname->len = iname->len; return 0; } if (iname->len < FSCRYPT_FNAME_MIN_MSG_LEN) return -EUCLEAN; if (fscrypt_has_encryption_key(inode)) return fname_decrypt(inode, iname, oname); /* * Sanity check that struct fscrypt_nokey_name doesn't have padding * between fields and that its encoded size never exceeds NAME_MAX. */ BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, dirhash) != offsetof(struct fscrypt_nokey_name, bytes)); BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) != offsetof(struct fscrypt_nokey_name, sha256)); BUILD_BUG_ON(FSCRYPT_NOKEY_NAME_MAX_ENCODED > NAME_MAX); nokey_name.dirhash[0] = hash; nokey_name.dirhash[1] = minor_hash; if (iname->len <= sizeof(nokey_name.bytes)) { memcpy(nokey_name.bytes, iname->name, iname->len); size = offsetof(struct fscrypt_nokey_name, bytes[iname->len]); } else { memcpy(nokey_name.bytes, iname->name, sizeof(nokey_name.bytes)); /* Compute strong hash of remaining part of name. */ sha256(&iname->name[sizeof(nokey_name.bytes)], iname->len - sizeof(nokey_name.bytes), nokey_name.sha256); size = FSCRYPT_NOKEY_NAME_MAX; } oname->len = fscrypt_base64url_encode((const u8 *)&nokey_name, size, oname->name); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); /** * fscrypt_setup_filename() - prepare to search a possibly encrypted directory * @dir: the directory that will be searched * @iname: the user-provided filename being searched for * @lookup: 1 if we're allowed to proceed without the key because it's * ->lookup() or we're finding the dir_entry for deletion; 0 if we cannot * proceed without the key because we're going to create the dir_entry. * @fname: the filename information to be filled in * * Given a user-provided filename @iname, this function sets @fname->disk_name * to the name that would be stored in the on-disk directory entry, if possible. * If the directory is unencrypted this is simply @iname. Else, if we have the * directory's encryption key, then @iname is the plaintext, so we encrypt it to * get the disk_name. * * Else, for keyless @lookup operations, @iname should be a no-key name, so we * decode it to get the struct fscrypt_nokey_name. Non-@lookup operations will * be impossible in this case, so we fail them with ENOKEY. * * If successful, fscrypt_free_filename() must be called later to clean up. * * Return: 0 on success, -errno on failure */ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { struct fscrypt_nokey_name *nokey_name; int ret; memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; if (!IS_ENCRYPTED(dir) || fscrypt_is_dot_dotdot(iname)) { fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; return 0; } ret = fscrypt_get_encryption_info(dir, lookup); if (ret) return ret; if (fscrypt_has_encryption_key(dir)) { if (!fscrypt_fname_encrypted_size(dir, iname->len, NAME_MAX, &fname->crypto_buf.len)) return -ENAMETOOLONG; fname->crypto_buf.name = kmalloc(fname->crypto_buf.len, GFP_NOFS); if (!fname->crypto_buf.name) return -ENOMEM; ret = fscrypt_fname_encrypt(dir, iname, fname->crypto_buf.name, fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; return 0; } if (!lookup) return -ENOKEY; fname->is_nokey_name = true; /* * We don't have the key and we are doing a lookup; decode the * user-supplied name */ if (iname->len > FSCRYPT_NOKEY_NAME_MAX_ENCODED) return -ENOENT; fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; ret = fscrypt_base64url_decode(iname->name, iname->len, fname->crypto_buf.name); if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) || (ret > offsetof(struct fscrypt_nokey_name, sha256) && ret != FSCRYPT_NOKEY_NAME_MAX)) { ret = -ENOENT; goto errout; } fname->crypto_buf.len = ret; nokey_name = (void *)fname->crypto_buf.name; fname->hash = nokey_name->dirhash[0]; fname->minor_hash = nokey_name->dirhash[1]; if (ret != FSCRYPT_NOKEY_NAME_MAX) { /* The full ciphertext filename is available. */ fname->disk_name.name = nokey_name->bytes; fname->disk_name.len = ret - offsetof(struct fscrypt_nokey_name, bytes); } return 0; errout: kfree(fname->crypto_buf.name); return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); /** * fscrypt_match_name() - test whether the given name matches a directory entry * @fname: the name being searched for * @de_name: the name from the directory entry * @de_name_len: the length of @de_name in bytes * * Normally @fname->disk_name will be set, and in that case we simply compare * that to the name stored in the directory entry. The only exception is that * if we don't have the key for an encrypted directory and the name we're * looking for is very long, then we won't have the full disk_name and instead * we'll need to match against a fscrypt_nokey_name that includes a strong hash. * * Return: %true if the name matches, otherwise %false. */ bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { const struct fscrypt_nokey_name *nokey_name = (const void *)fname->crypto_buf.name; u8 digest[SHA256_DIGEST_SIZE]; if (likely(fname->disk_name.name)) { if (de_name_len != fname->disk_name.len) return false; return !memcmp(de_name, fname->disk_name.name, de_name_len); } if (de_name_len <= sizeof(nokey_name->bytes)) return false; if (memcmp(de_name, nokey_name->bytes, sizeof(nokey_name->bytes))) return false; sha256(&de_name[sizeof(nokey_name->bytes)], de_name_len - sizeof(nokey_name->bytes), digest); return !memcmp(digest, nokey_name->sha256, sizeof(digest)); } EXPORT_SYMBOL_GPL(fscrypt_match_name); /** * fscrypt_fname_siphash() - calculate the SipHash of a filename * @dir: the parent directory * @name: the filename to calculate the SipHash of * * Given a plaintext filename @name and a directory @dir which uses SipHash as * its dirhash method and has had its fscrypt key set up, this function * calculates the SipHash of that name using the directory's secret dirhash key. * * Return: the SipHash of @name using the hash key of @dir */ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) { const struct fscrypt_inode_info *ci = dir->i_crypt_info; WARN_ON_ONCE(!ci->ci_dirhash_key_initialized); return siphash(name->name, name->len, &ci->ci_dirhash_key); } EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); /* * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. */ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *dir; int err; int valid; /* * Plaintext names are always valid, since fscrypt doesn't support * reverting to no-key names without evicting the directory's inode * -- which implies eviction of the dentries in the directory. */ if (!(dentry->d_flags & DCACHE_NOKEY_NAME)) return 1; /* * No-key name; valid if the directory's key is still unavailable. * * Although fscrypt forbids rename() on no-key names, we still must use * dget_parent() here rather than use ->d_parent directly. That's * because a corrupted fs image may contain directory hard links, which * the VFS handles by moving the directory's dentry tree in the dcache * each time ->lookup() finds the directory and it already has a dentry * elsewhere. Thus ->d_parent can be changing, and we must safely grab * a reference to some ->d_parent to prevent it from being freed. */ if (flags & LOOKUP_RCU) return -ECHILD; dir = dget_parent(dentry); /* * Pass allow_unsupported=true, so that files with an unsupported * encryption policy can be deleted. */ err = fscrypt_get_encryption_info(d_inode(dir), true); valid = !fscrypt_has_encryption_key(d_inode(dir)); dput(dir); if (err < 0) return err; return valid; } EXPORT_SYMBOL_GPL(fscrypt_d_revalidate); |
5 9 2 2 2 3 3 3 1 3 3 3 3 3 3 3 3 2 1 2 2 2 2 4 1 4 1 5 2 2 2 2 1 1 1 3 2 1 2 2 3 2 2 1 1 7 2 1 1 2 3 3 3 3 3 2 1 2 2 2 2 2 4 2 4 1 1 1 1 2 8 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include <trace/events/devlink.h> #include "devl_internal.h" struct devlink_stats { u64_stats_t rx_bytes; u64_stats_t rx_packets; struct u64_stats_sync syncp; }; /** * struct devlink_trap_policer_item - Packet trap policer attributes. * @policer: Immutable packet trap policer attributes. * @rate: Rate in packets / sec. * @burst: Burst size in packets. * @list: trap_policer_list member. * * Describes packet trap policer attributes. Created by devlink during trap * policer registration. */ struct devlink_trap_policer_item { const struct devlink_trap_policer *policer; u64 rate; u64 burst; struct list_head list; }; /** * struct devlink_trap_group_item - Packet trap group attributes. * @group: Immutable packet trap group attributes. * @policer_item: Associated policer item. Can be NULL. * @list: trap_group_list member. * @stats: Trap group statistics. * * Describes packet trap group attributes. Created by devlink during trap * group registration. */ struct devlink_trap_group_item { const struct devlink_trap_group *group; struct devlink_trap_policer_item *policer_item; struct list_head list; struct devlink_stats __percpu *stats; }; /** * struct devlink_trap_item - Packet trap attributes. * @trap: Immutable packet trap attributes. * @group_item: Associated group item. * @list: trap_list member. * @action: Trap action. * @stats: Trap statistics. * @priv: Driver private information. * * Describes both mutable and immutable packet trap attributes. Created by * devlink during trap registration and used for all trap related operations. */ struct devlink_trap_item { const struct devlink_trap *trap; struct devlink_trap_group_item *group_item; struct list_head list; enum devlink_trap_action action; struct devlink_stats __percpu *stats; void *priv; }; static struct devlink_trap_policer_item * devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id) { struct devlink_trap_policer_item *policer_item; list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { if (policer_item->policer->id == id) return policer_item; } return NULL; } static struct devlink_trap_item * devlink_trap_item_lookup(struct devlink *devlink, const char *name) { struct devlink_trap_item *trap_item; list_for_each_entry(trap_item, &devlink->trap_list, list) { if (!strcmp(trap_item->trap->name, name)) return trap_item; } return NULL; } static struct devlink_trap_item * devlink_trap_item_get_from_info(struct devlink *devlink, struct genl_info *info) { struct nlattr *attr; if (!info->attrs[DEVLINK_ATTR_TRAP_NAME]) return NULL; attr = info->attrs[DEVLINK_ATTR_TRAP_NAME]; return devlink_trap_item_lookup(devlink, nla_data(attr)); } static int devlink_trap_action_get_from_info(struct genl_info *info, enum devlink_trap_action *p_trap_action) { u8 val; val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]); switch (val) { case DEVLINK_TRAP_ACTION_DROP: case DEVLINK_TRAP_ACTION_TRAP: case DEVLINK_TRAP_ACTION_MIRROR: *p_trap_action = val; break; default: return -EINVAL; } return 0; } static int devlink_trap_metadata_put(struct sk_buff *msg, const struct devlink_trap *trap) { struct nlattr *attr; attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA); if (!attr) return -EMSGSIZE; if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) && nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT)) goto nla_put_failure; if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) && nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, struct devlink_stats *stats) { int i; memset(stats, 0, sizeof(*stats)); for_each_possible_cpu(i) { struct devlink_stats *cpu_stats; u64 rx_packets, rx_bytes; unsigned int start; cpu_stats = per_cpu_ptr(trap_stats, i); do { start = u64_stats_fetch_begin(&cpu_stats->syncp); rx_packets = u64_stats_read(&cpu_stats->rx_packets); rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->rx_packets, rx_packets); u64_stats_add(&stats->rx_bytes, rx_bytes); } } static int devlink_trap_group_stats_put(struct sk_buff *msg, struct devlink_stats __percpu *trap_stats) { struct devlink_stats stats; struct nlattr *attr; devlink_trap_stats_read(trap_stats, &stats); attr = nla_nest_start(msg, DEVLINK_ATTR_STATS); if (!attr) return -EMSGSIZE; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, u64_stats_read(&stats.rx_packets), DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, u64_stats_read(&stats.rx_bytes), DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink, const struct devlink_trap_item *trap_item) { struct devlink_stats stats; struct nlattr *attr; u64 drops = 0; int err; if (devlink->ops->trap_drop_counter_get) { err = devlink->ops->trap_drop_counter_get(devlink, trap_item->trap, &drops); if (err) return err; } devlink_trap_stats_read(trap_item->stats, &stats); attr = nla_nest_start(msg, DEVLINK_ATTR_STATS); if (!attr) return -EMSGSIZE; if (devlink->ops->trap_drop_counter_get && nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops, DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, u64_stats_read(&stats.rx_packets), DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, u64_stats_read(&stats.rx_bytes), DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink, const struct devlink_trap_item *trap_item, enum devlink_command cmd, u32 portid, u32 seq, int flags) { struct devlink_trap_group_item *group_item = trap_item->group_item; void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME, group_item->group->name)) goto nla_put_failure; if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type)) goto nla_put_failure; if (trap_item->trap->generic && nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action)) goto nla_put_failure; err = devlink_trap_metadata_put(msg, trap_item->trap); if (err) goto nla_put_failure; err = devlink_trap_stats_put(msg, devlink, trap_item); if (err) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; struct devlink_trap_item *trap_item; struct sk_buff *msg; int err; if (list_empty(&devlink->trap_list)) return -EOPNOTSUPP; trap_item = devlink_trap_item_get_from_info(devlink, info); if (!trap_item) { NL_SET_ERR_MSG(extack, "Device did not register this trap"); return -ENOENT; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_trap_fill(msg, devlink, trap_item, DEVLINK_CMD_TRAP_NEW, info->snd_portid, info->snd_seq, 0); if (err) goto err_trap_fill; return genlmsg_reply(msg, info); err_trap_fill: nlmsg_free(msg); return err; } static int devlink_nl_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_trap_item *trap_item; int idx = 0; int err = 0; list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < state->idx) { idx++; continue; } err = devlink_nl_trap_fill(msg, devlink, trap_item, DEVLINK_CMD_TRAP_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err) { state->idx = idx; break; } idx++; } return err; } int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_trap_get_dump_one); } static int __devlink_trap_action_set(struct devlink *devlink, struct devlink_trap_item *trap_item, enum devlink_trap_action trap_action, struct netlink_ext_ack *extack) { int err; if (trap_item->action != trap_action && trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) { NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping"); return 0; } err = devlink->ops->trap_action_set(devlink, trap_item->trap, trap_action, extack); if (err) return err; trap_item->action = trap_action; return 0; } static int devlink_trap_action_set(struct devlink *devlink, struct devlink_trap_item *trap_item, struct genl_info *info) { enum devlink_trap_action trap_action; int err; if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION]) return 0; err = devlink_trap_action_get_from_info(info, &trap_action); if (err) { NL_SET_ERR_MSG(info->extack, "Invalid trap action"); return -EINVAL; } return __devlink_trap_action_set(devlink, trap_item, trap_action, info->extack); } int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; struct devlink_trap_item *trap_item; if (list_empty(&devlink->trap_list)) return -EOPNOTSUPP; trap_item = devlink_trap_item_get_from_info(devlink, info); if (!trap_item) { NL_SET_ERR_MSG(extack, "Device did not register this trap"); return -ENOENT; } return devlink_trap_action_set(devlink, trap_item, info); } static struct devlink_trap_group_item * devlink_trap_group_item_lookup(struct devlink *devlink, const char *name) { struct devlink_trap_group_item *group_item; list_for_each_entry(group_item, &devlink->trap_group_list, list) { if (!strcmp(group_item->group->name, name)) return group_item; } return NULL; } static struct devlink_trap_group_item * devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id) { struct devlink_trap_group_item *group_item; list_for_each_entry(group_item, &devlink->trap_group_list, list) { if (group_item->group->id == id) return group_item; } return NULL; } static struct devlink_trap_group_item * devlink_trap_group_item_get_from_info(struct devlink *devlink, struct genl_info *info) { char *name; if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]) return NULL; name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]); return devlink_trap_group_item_lookup(devlink, name); } static int devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink, const struct devlink_trap_group_item *group_item, enum devlink_command cmd, u32 portid, u32 seq, int flags) { void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME, group_item->group->name)) goto nla_put_failure; if (group_item->group->generic && nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC)) goto nla_put_failure; if (group_item->policer_item && nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID, group_item->policer_item->policer->id)) goto nla_put_failure; err = devlink_trap_group_stats_put(msg, group_item->stats); if (err) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; struct devlink_trap_group_item *group_item; struct sk_buff *msg; int err; if (list_empty(&devlink->trap_group_list)) return -EOPNOTSUPP; group_item = devlink_trap_group_item_get_from_info(devlink, info); if (!group_item) { NL_SET_ERR_MSG(extack, "Device did not register this trap group"); return -ENOENT; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_trap_group_fill(msg, devlink, group_item, DEVLINK_CMD_TRAP_GROUP_NEW, info->snd_portid, info->snd_seq, 0); if (err) goto err_trap_group_fill; return genlmsg_reply(msg, info); err_trap_group_fill: nlmsg_free(msg); return err; } static int devlink_nl_trap_group_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_trap_group_item *group_item; int idx = 0; int err = 0; list_for_each_entry(group_item, &devlink->trap_group_list, list) { if (idx < state->idx) { idx++; continue; } err = devlink_nl_trap_group_fill(msg, devlink, group_item, DEVLINK_CMD_TRAP_GROUP_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err) { state->idx = idx; break; } idx++; } return err; } int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_trap_group_get_dump_one); } static int __devlink_trap_group_action_set(struct devlink *devlink, struct devlink_trap_group_item *group_item, enum devlink_trap_action trap_action, struct netlink_ext_ack *extack) { const char *group_name = group_item->group->name; struct devlink_trap_item *trap_item; int err; if (devlink->ops->trap_group_action_set) { err = devlink->ops->trap_group_action_set(devlink, group_item->group, trap_action, extack); if (err) return err; list_for_each_entry(trap_item, &devlink->trap_list, list) { if (strcmp(trap_item->group_item->group->name, group_name)) continue; if (trap_item->action != trap_action && trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) continue; trap_item->action = trap_action; } return 0; } list_for_each_entry(trap_item, &devlink->trap_list, list) { if (strcmp(trap_item->group_item->group->name, group_name)) continue; err = __devlink_trap_action_set(devlink, trap_item, trap_action, extack); if (err) return err; } return 0; } static int devlink_trap_group_action_set(struct devlink *devlink, struct devlink_trap_group_item *group_item, struct genl_info *info, bool *p_modified) { enum devlink_trap_action trap_action; int err; if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION]) return 0; err = devlink_trap_action_get_from_info(info, &trap_action); if (err) { NL_SET_ERR_MSG(info->extack, "Invalid trap action"); return -EINVAL; } err = __devlink_trap_group_action_set(devlink, group_item, trap_action, info->extack); if (err) return err; *p_modified = true; return 0; } static int devlink_trap_group_set(struct devlink *devlink, struct devlink_trap_group_item *group_item, struct genl_info *info) { struct devlink_trap_policer_item *policer_item; struct netlink_ext_ack *extack = info->extack; const struct devlink_trap_policer *policer; struct nlattr **attrs = info->attrs; u32 policer_id; int err; if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID]) return 0; if (!devlink->ops->trap_group_set) return -EOPNOTSUPP; policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]); policer_item = devlink_trap_policer_item_lookup(devlink, policer_id); if (policer_id && !policer_item) { NL_SET_ERR_MSG(extack, "Device did not register this trap policer"); return -ENOENT; } policer = policer_item ? policer_item->policer : NULL; err = devlink->ops->trap_group_set(devlink, group_item->group, policer, extack); if (err) return err; group_item->policer_item = policer_item; return 0; } int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; struct devlink_trap_group_item *group_item; bool modified = false; int err; if (list_empty(&devlink->trap_group_list)) return -EOPNOTSUPP; group_item = devlink_trap_group_item_get_from_info(devlink, info); if (!group_item) { NL_SET_ERR_MSG(extack, "Device did not register this trap group"); return -ENOENT; } err = devlink_trap_group_action_set(devlink, group_item, info, &modified); if (err) return err; err = devlink_trap_group_set(devlink, group_item, info); if (err) goto err_trap_group_set; return 0; err_trap_group_set: if (modified) NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already"); return err; } static struct devlink_trap_policer_item * devlink_trap_policer_item_get_from_info(struct devlink *devlink, struct genl_info *info) { u32 id; if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]) return NULL; id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]); return devlink_trap_policer_item_lookup(devlink, id); } static int devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink, const struct devlink_trap_policer *policer) { struct nlattr *attr; u64 drops; int err; if (!devlink->ops->trap_policer_counter_get) return 0; err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops); if (err) return err; attr = nla_nest_start(msg, DEVLINK_ATTR_STATS); if (!attr) return -EMSGSIZE; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops, DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static int devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink, const struct devlink_trap_policer_item *policer_item, enum devlink_command cmd, u32 portid, u32 seq, int flags) { void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID, policer_item->policer->id)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE, policer_item->rate, DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST, policer_item->burst, DEVLINK_ATTR_PAD)) goto nla_put_failure; err = devlink_trap_policer_stats_put(msg, devlink, policer_item->policer); if (err) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } int devlink_nl_trap_policer_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_trap_policer_item *policer_item; struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; if (list_empty(&devlink->trap_policer_list)) return -EOPNOTSUPP; policer_item = devlink_trap_policer_item_get_from_info(devlink, info); if (!policer_item) { NL_SET_ERR_MSG(extack, "Device did not register this trap policer"); return -ENOENT; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, DEVLINK_CMD_TRAP_POLICER_NEW, info->snd_portid, info->snd_seq, 0); if (err) goto err_trap_policer_fill; return genlmsg_reply(msg, info); err_trap_policer_fill: nlmsg_free(msg); return err; } static int devlink_nl_trap_policer_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_trap_policer_item *policer_item; int idx = 0; int err = 0; list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { if (idx < state->idx) { idx++; continue; } err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, DEVLINK_CMD_TRAP_POLICER_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err) { state->idx = idx; break; } idx++; } return err; } int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_trap_policer_get_dump_one); } static int devlink_trap_policer_set(struct devlink *devlink, struct devlink_trap_policer_item *policer_item, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct nlattr **attrs = info->attrs; u64 rate, burst; int err; rate = policer_item->rate; burst = policer_item->burst; if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]) rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]); if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]) burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]); if (rate < policer_item->policer->min_rate) { NL_SET_ERR_MSG(extack, "Policer rate lower than limit"); return -EINVAL; } if (rate > policer_item->policer->max_rate) { NL_SET_ERR_MSG(extack, "Policer rate higher than limit"); return -EINVAL; } if (burst < policer_item->policer->min_burst) { NL_SET_ERR_MSG(extack, "Policer burst size lower than limit"); return -EINVAL; } if (burst > policer_item->policer->max_burst) { NL_SET_ERR_MSG(extack, "Policer burst size higher than limit"); return -EINVAL; } err = devlink->ops->trap_policer_set(devlink, policer_item->policer, rate, burst, info->extack); if (err) return err; policer_item->rate = rate; policer_item->burst = burst; return 0; } int devlink_nl_trap_policer_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_trap_policer_item *policer_item; struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; if (list_empty(&devlink->trap_policer_list)) return -EOPNOTSUPP; if (!devlink->ops->trap_policer_set) return -EOPNOTSUPP; policer_item = devlink_trap_policer_item_get_from_info(devlink, info); if (!policer_item) { NL_SET_ERR_MSG(extack, "Device did not register this trap policer"); return -ENOENT; } return devlink_trap_policer_set(devlink, policer_item, info); } #define DEVLINK_TRAP(_id, _type) \ { \ .type = DEVLINK_TRAP_TYPE_##_type, \ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \ } static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(SMAC_MC, DROP), DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP), DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP), DEVLINK_TRAP(INGRESS_STP_FILTER, DROP), DEVLINK_TRAP(EMPTY_TX_LIST, DROP), DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP), DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP), DEVLINK_TRAP(TTL_ERROR, EXCEPTION), DEVLINK_TRAP(TAIL_DROP, DROP), DEVLINK_TRAP(NON_IP_PACKET, DROP), DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP), DEVLINK_TRAP(DIP_LB, DROP), DEVLINK_TRAP(SIP_MC, DROP), DEVLINK_TRAP(SIP_LB, DROP), DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP), DEVLINK_TRAP(IPV4_SIP_BC, DROP), DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP), DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP), DEVLINK_TRAP(MTU_ERROR, EXCEPTION), DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION), DEVLINK_TRAP(RPF, EXCEPTION), DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION), DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION), DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION), DEVLINK_TRAP(NON_ROUTABLE, DROP), DEVLINK_TRAP(DECAP_ERROR, EXCEPTION), DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP), DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP), DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP), DEVLINK_TRAP(STP, CONTROL), DEVLINK_TRAP(LACP, CONTROL), DEVLINK_TRAP(LLDP, CONTROL), DEVLINK_TRAP(IGMP_QUERY, CONTROL), DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL), DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL), DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL), DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL), DEVLINK_TRAP(MLD_QUERY, CONTROL), DEVLINK_TRAP(MLD_V1_REPORT, CONTROL), DEVLINK_TRAP(MLD_V2_REPORT, CONTROL), DEVLINK_TRAP(MLD_V1_DONE, CONTROL), DEVLINK_TRAP(IPV4_DHCP, CONTROL), DEVLINK_TRAP(IPV6_DHCP, CONTROL), DEVLINK_TRAP(ARP_REQUEST, CONTROL), DEVLINK_TRAP(ARP_RESPONSE, CONTROL), DEVLINK_TRAP(ARP_OVERLAY, CONTROL), DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL), DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL), DEVLINK_TRAP(IPV4_BFD, CONTROL), DEVLINK_TRAP(IPV6_BFD, CONTROL), DEVLINK_TRAP(IPV4_OSPF, CONTROL), DEVLINK_TRAP(IPV6_OSPF, CONTROL), DEVLINK_TRAP(IPV4_BGP, CONTROL), DEVLINK_TRAP(IPV6_BGP, CONTROL), DEVLINK_TRAP(IPV4_VRRP, CONTROL), DEVLINK_TRAP(IPV6_VRRP, CONTROL), DEVLINK_TRAP(IPV4_PIM, CONTROL), DEVLINK_TRAP(IPV6_PIM, CONTROL), DEVLINK_TRAP(UC_LB, CONTROL), DEVLINK_TRAP(LOCAL_ROUTE, CONTROL), DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL), DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL), DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL), DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL), DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL), DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL), DEVLINK_TRAP(IPV6_REDIRECT, CONTROL), DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL), DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL), DEVLINK_TRAP(PTP_EVENT, CONTROL), DEVLINK_TRAP(PTP_GENERAL, CONTROL), DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL), DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL), DEVLINK_TRAP(EARLY_DROP, DROP), DEVLINK_TRAP(VXLAN_PARSING, DROP), DEVLINK_TRAP(LLC_SNAP_PARSING, DROP), DEVLINK_TRAP(VLAN_PARSING, DROP), DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP), DEVLINK_TRAP(MPLS_PARSING, DROP), DEVLINK_TRAP(ARP_PARSING, DROP), DEVLINK_TRAP(IP_1_PARSING, DROP), DEVLINK_TRAP(IP_N_PARSING, DROP), DEVLINK_TRAP(GRE_PARSING, DROP), DEVLINK_TRAP(UDP_PARSING, DROP), DEVLINK_TRAP(TCP_PARSING, DROP), DEVLINK_TRAP(IPSEC_PARSING, DROP), DEVLINK_TRAP(SCTP_PARSING, DROP), DEVLINK_TRAP(DCCP_PARSING, DROP), DEVLINK_TRAP(GTP_PARSING, DROP), DEVLINK_TRAP(ESP_PARSING, DROP), DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP), DEVLINK_TRAP(DMAC_FILTER, DROP), DEVLINK_TRAP(EAPOL, CONTROL), DEVLINK_TRAP(LOCKED_PORT, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ { \ .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \ .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \ } static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(L2_DROPS), DEVLINK_TRAP_GROUP(L3_DROPS), DEVLINK_TRAP_GROUP(L3_EXCEPTIONS), DEVLINK_TRAP_GROUP(BUFFER_DROPS), DEVLINK_TRAP_GROUP(TUNNEL_DROPS), DEVLINK_TRAP_GROUP(ACL_DROPS), DEVLINK_TRAP_GROUP(STP), DEVLINK_TRAP_GROUP(LACP), DEVLINK_TRAP_GROUP(LLDP), DEVLINK_TRAP_GROUP(MC_SNOOPING), DEVLINK_TRAP_GROUP(DHCP), DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY), DEVLINK_TRAP_GROUP(BFD), DEVLINK_TRAP_GROUP(OSPF), DEVLINK_TRAP_GROUP(BGP), DEVLINK_TRAP_GROUP(VRRP), DEVLINK_TRAP_GROUP(PIM), DEVLINK_TRAP_GROUP(UC_LB), DEVLINK_TRAP_GROUP(LOCAL_DELIVERY), DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY), DEVLINK_TRAP_GROUP(IPV6), DEVLINK_TRAP_GROUP(PTP_EVENT), DEVLINK_TRAP_GROUP(PTP_GENERAL), DEVLINK_TRAP_GROUP(ACL_SAMPLE), DEVLINK_TRAP_GROUP(ACL_TRAP), DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS), DEVLINK_TRAP_GROUP(EAPOL), }; static int devlink_trap_generic_verify(const struct devlink_trap *trap) { if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX) return -EINVAL; if (strcmp(trap->name, devlink_trap_generic[trap->id].name)) return -EINVAL; if (trap->type != devlink_trap_generic[trap->id].type) return -EINVAL; return 0; } static int devlink_trap_driver_verify(const struct devlink_trap *trap) { int i; if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX) return -EINVAL; for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) { if (!strcmp(trap->name, devlink_trap_generic[i].name)) return -EEXIST; } return 0; } static int devlink_trap_verify(const struct devlink_trap *trap) { if (!trap || !trap->name) return -EINVAL; if (trap->generic) return devlink_trap_generic_verify(trap); else return devlink_trap_driver_verify(trap); } static int devlink_trap_group_generic_verify(const struct devlink_trap_group *group) { if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX) return -EINVAL; if (strcmp(group->name, devlink_trap_group_generic[group->id].name)) return -EINVAL; return 0; } static int devlink_trap_group_driver_verify(const struct devlink_trap_group *group) { int i; if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX) return -EINVAL; for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) { if (!strcmp(group->name, devlink_trap_group_generic[i].name)) return -EEXIST; } return 0; } static int devlink_trap_group_verify(const struct devlink_trap_group *group) { if (group->generic) return devlink_trap_group_generic_verify(group); else return devlink_trap_group_driver_verify(group); } static void devlink_trap_group_notify(struct devlink *devlink, const struct devlink_trap_group_item *group_item, enum devlink_command cmd) { struct sk_buff *msg; int err; WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && cmd != DEVLINK_CMD_TRAP_GROUP_DEL); if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } void devlink_trap_groups_notify_register(struct devlink *devlink) { struct devlink_trap_group_item *group_item; list_for_each_entry(group_item, &devlink->trap_group_list, list) devlink_trap_group_notify(devlink, group_item, DEVLINK_CMD_TRAP_GROUP_NEW); } void devlink_trap_groups_notify_unregister(struct devlink *devlink) { struct devlink_trap_group_item *group_item; list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list) devlink_trap_group_notify(devlink, group_item, DEVLINK_CMD_TRAP_GROUP_DEL); } static int devlink_trap_item_group_link(struct devlink *devlink, struct devlink_trap_item *trap_item) { u16 group_id = trap_item->trap->init_group_id; struct devlink_trap_group_item *group_item; group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id); if (WARN_ON_ONCE(!group_item)) return -EINVAL; trap_item->group_item = group_item; return 0; } static void devlink_trap_notify(struct devlink *devlink, const struct devlink_trap_item *trap_item, enum devlink_command cmd) { struct sk_buff *msg; int err; WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && cmd != DEVLINK_CMD_TRAP_DEL); if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } void devlink_traps_notify_register(struct devlink *devlink) { struct devlink_trap_item *trap_item; list_for_each_entry(trap_item, &devlink->trap_list, list) devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW); } void devlink_traps_notify_unregister(struct devlink *devlink) { struct devlink_trap_item *trap_item; list_for_each_entry_reverse(trap_item, &devlink->trap_list, list) devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL); } static int devlink_trap_register(struct devlink *devlink, const struct devlink_trap *trap, void *priv) { struct devlink_trap_item *trap_item; int err; if (devlink_trap_item_lookup(devlink, trap->name)) return -EEXIST; trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL); if (!trap_item) return -ENOMEM; trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); if (!trap_item->stats) { err = -ENOMEM; goto err_stats_alloc; } trap_item->trap = trap; trap_item->action = trap->init_action; trap_item->priv = priv; err = devlink_trap_item_group_link(devlink, trap_item); if (err) goto err_group_link; err = devlink->ops->trap_init(devlink, trap, trap_item); if (err) goto err_trap_init; list_add_tail(&trap_item->list, &devlink->trap_list); devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW); return 0; err_trap_init: err_group_link: free_percpu(trap_item->stats); err_stats_alloc: kfree(trap_item); return err; } static void devlink_trap_unregister(struct devlink *devlink, const struct devlink_trap *trap) { struct devlink_trap_item *trap_item; trap_item = devlink_trap_item_lookup(devlink, trap->name); if (WARN_ON_ONCE(!trap_item)) return; devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL); list_del(&trap_item->list); if (devlink->ops->trap_fini) devlink->ops->trap_fini(devlink, trap, trap_item); free_percpu(trap_item->stats); kfree(trap_item); } static void devlink_trap_disable(struct devlink *devlink, const struct devlink_trap *trap) { struct devlink_trap_item *trap_item; trap_item = devlink_trap_item_lookup(devlink, trap->name); if (WARN_ON_ONCE(!trap_item)) return; devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP, NULL); trap_item->action = DEVLINK_TRAP_ACTION_DROP; } /** * devl_traps_register - Register packet traps with devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. * @priv: Driver private information. * * Return: Non-zero value on failure. */ int devl_traps_register(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count, void *priv) { int i, err; if (!devlink->ops->trap_init || !devlink->ops->trap_action_set) return -EINVAL; devl_assert_locked(devlink); for (i = 0; i < traps_count; i++) { const struct devlink_trap *trap = &traps[i]; err = devlink_trap_verify(trap); if (err) goto err_trap_verify; err = devlink_trap_register(devlink, trap, priv); if (err) goto err_trap_register; } return 0; err_trap_register: err_trap_verify: for (i--; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); return err; } EXPORT_SYMBOL_GPL(devl_traps_register); /** * devlink_traps_register - Register packet traps with devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. * @priv: Driver private information. * * Context: Takes and release devlink->lock <mutex>. * * Return: Non-zero value on failure. */ int devlink_traps_register(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count, void *priv) { int err; devl_lock(devlink); err = devl_traps_register(devlink, traps, traps_count, priv); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_traps_register); /** * devl_traps_unregister - Unregister packet traps from devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. */ void devl_traps_unregister(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count) { int i; devl_assert_locked(devlink); /* Make sure we do not have any packets in-flight while unregistering * traps by disabling all of them and waiting for a grace period. */ for (i = traps_count - 1; i >= 0; i--) devlink_trap_disable(devlink, &traps[i]); synchronize_rcu(); for (i = traps_count - 1; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); } EXPORT_SYMBOL_GPL(devl_traps_unregister); /** * devlink_traps_unregister - Unregister packet traps from devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. * * Context: Takes and release devlink->lock <mutex>. */ void devlink_traps_unregister(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count) { devl_lock(devlink); devl_traps_unregister(devlink, traps, traps_count); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_traps_unregister); static void devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats, size_t skb_len) { struct devlink_stats *stats; stats = this_cpu_ptr(trap_stats); u64_stats_update_begin(&stats->syncp); u64_stats_add(&stats->rx_bytes, skb_len); u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } static void devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata, const struct devlink_trap_item *trap_item, struct devlink_port *in_devlink_port, const struct flow_action_cookie *fa_cookie) { metadata->trap_name = trap_item->trap->name; metadata->trap_group_name = trap_item->group_item->group->name; metadata->fa_cookie = fa_cookie; metadata->trap_type = trap_item->trap->type; spin_lock(&in_devlink_port->type_lock); if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH) metadata->input_dev = in_devlink_port->type_eth.netdev; spin_unlock(&in_devlink_port->type_lock); } /** * devlink_trap_report - Report trapped packet to drop monitor. * @devlink: devlink. * @skb: Trapped packet. * @trap_ctx: Trap context. * @in_devlink_port: Input devlink port. * @fa_cookie: Flow action cookie. Could be NULL. */ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, void *trap_ctx, struct devlink_port *in_devlink_port, const struct flow_action_cookie *fa_cookie) { struct devlink_trap_item *trap_item = trap_ctx; devlink_trap_stats_update(trap_item->stats, skb->len); devlink_trap_stats_update(trap_item->group_item->stats, skb->len); if (tracepoint_enabled(devlink_trap_report)) { struct devlink_trap_metadata metadata = {}; devlink_trap_report_metadata_set(&metadata, trap_item, in_devlink_port, fa_cookie); trace_devlink_trap_report(devlink, skb, &metadata); } } EXPORT_SYMBOL_GPL(devlink_trap_report); /** * devlink_trap_ctx_priv - Trap context to driver private information. * @trap_ctx: Trap context. * * Return: Driver private information passed during registration. */ void *devlink_trap_ctx_priv(void *trap_ctx) { struct devlink_trap_item *trap_item = trap_ctx; return trap_item->priv; } EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv); static int devlink_trap_group_item_policer_link(struct devlink *devlink, struct devlink_trap_group_item *group_item) { u32 policer_id = group_item->group->init_policer_id; struct devlink_trap_policer_item *policer_item; if (policer_id == 0) return 0; policer_item = devlink_trap_policer_item_lookup(devlink, policer_id); if (WARN_ON_ONCE(!policer_item)) return -EINVAL; group_item->policer_item = policer_item; return 0; } static int devlink_trap_group_register(struct devlink *devlink, const struct devlink_trap_group *group) { struct devlink_trap_group_item *group_item; int err; if (devlink_trap_group_item_lookup(devlink, group->name)) return -EEXIST; group_item = kzalloc(sizeof(*group_item), GFP_KERNEL); if (!group_item) return -ENOMEM; group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); if (!group_item->stats) { err = -ENOMEM; goto err_stats_alloc; } group_item->group = group; err = devlink_trap_group_item_policer_link(devlink, group_item); if (err) goto err_policer_link; if (devlink->ops->trap_group_init) { err = devlink->ops->trap_group_init(devlink, group); if (err) goto err_group_init; } list_add_tail(&group_item->list, &devlink->trap_group_list); devlink_trap_group_notify(devlink, group_item, DEVLINK_CMD_TRAP_GROUP_NEW); return 0; err_group_init: err_policer_link: free_percpu(group_item->stats); err_stats_alloc: kfree(group_item); return err; } static void devlink_trap_group_unregister(struct devlink *devlink, const struct devlink_trap_group *group) { struct devlink_trap_group_item *group_item; group_item = devlink_trap_group_item_lookup(devlink, group->name); if (WARN_ON_ONCE(!group_item)) return; devlink_trap_group_notify(devlink, group_item, DEVLINK_CMD_TRAP_GROUP_DEL); list_del(&group_item->list); free_percpu(group_item->stats); kfree(group_item); } /** * devl_trap_groups_register - Register packet trap groups with devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. * * Return: Non-zero value on failure. */ int devl_trap_groups_register(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count) { int i, err; devl_assert_locked(devlink); for (i = 0; i < groups_count; i++) { const struct devlink_trap_group *group = &groups[i]; err = devlink_trap_group_verify(group); if (err) goto err_trap_group_verify; err = devlink_trap_group_register(devlink, group); if (err) goto err_trap_group_register; } return 0; err_trap_group_register: err_trap_group_verify: for (i--; i >= 0; i--) devlink_trap_group_unregister(devlink, &groups[i]); return err; } EXPORT_SYMBOL_GPL(devl_trap_groups_register); /** * devlink_trap_groups_register - Register packet trap groups with devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. * * Context: Takes and release devlink->lock <mutex>. * * Return: Non-zero value on failure. */ int devlink_trap_groups_register(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count) { int err; devl_lock(devlink); err = devl_trap_groups_register(devlink, groups, groups_count); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_trap_groups_register); /** * devl_trap_groups_unregister - Unregister packet trap groups from devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. */ void devl_trap_groups_unregister(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count) { int i; devl_assert_locked(devlink); for (i = groups_count - 1; i >= 0; i--) devlink_trap_group_unregister(devlink, &groups[i]); } EXPORT_SYMBOL_GPL(devl_trap_groups_unregister); /** * devlink_trap_groups_unregister - Unregister packet trap groups from devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. * * Context: Takes and release devlink->lock <mutex>. */ void devlink_trap_groups_unregister(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count) { devl_lock(devlink); devl_trap_groups_unregister(devlink, groups, groups_count); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister); static void devlink_trap_policer_notify(struct devlink *devlink, const struct devlink_trap_policer_item *policer_item, enum devlink_command cmd) { struct sk_buff *msg; int err; WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW && cmd != DEVLINK_CMD_TRAP_POLICER_DEL); if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } void devlink_trap_policers_notify_register(struct devlink *devlink) { struct devlink_trap_policer_item *policer_item; list_for_each_entry(policer_item, &devlink->trap_policer_list, list) devlink_trap_policer_notify(devlink, policer_item, DEVLINK_CMD_TRAP_POLICER_NEW); } void devlink_trap_policers_notify_unregister(struct devlink *devlink) { struct devlink_trap_policer_item *policer_item; list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list, list) devlink_trap_policer_notify(devlink, policer_item, DEVLINK_CMD_TRAP_POLICER_DEL); } static int devlink_trap_policer_register(struct devlink *devlink, const struct devlink_trap_policer *policer) { struct devlink_trap_policer_item *policer_item; int err; if (devlink_trap_policer_item_lookup(devlink, policer->id)) return -EEXIST; policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL); if (!policer_item) return -ENOMEM; policer_item->policer = policer; policer_item->rate = policer->init_rate; policer_item->burst = policer->init_burst; if (devlink->ops->trap_policer_init) { err = devlink->ops->trap_policer_init(devlink, policer); if (err) goto err_policer_init; } list_add_tail(&policer_item->list, &devlink->trap_policer_list); devlink_trap_policer_notify(devlink, policer_item, DEVLINK_CMD_TRAP_POLICER_NEW); return 0; err_policer_init: kfree(policer_item); return err; } static void devlink_trap_policer_unregister(struct devlink *devlink, const struct devlink_trap_policer *policer) { struct devlink_trap_policer_item *policer_item; policer_item = devlink_trap_policer_item_lookup(devlink, policer->id); if (WARN_ON_ONCE(!policer_item)) return; devlink_trap_policer_notify(devlink, policer_item, DEVLINK_CMD_TRAP_POLICER_DEL); list_del(&policer_item->list); if (devlink->ops->trap_policer_fini) devlink->ops->trap_policer_fini(devlink, policer); kfree(policer_item); } /** * devl_trap_policers_register - Register packet trap policers with devlink. * @devlink: devlink. * @policers: Packet trap policers. * @policers_count: Count of provided packet trap policers. * * Return: Non-zero value on failure. */ int devl_trap_policers_register(struct devlink *devlink, const struct devlink_trap_policer *policers, size_t policers_count) { int i, err; devl_assert_locked(devlink); for (i = 0; i < policers_count; i++) { const struct devlink_trap_policer *policer = &policers[i]; if (WARN_ON(policer->id == 0 || policer->max_rate < policer->min_rate || policer->max_burst < policer->min_burst)) { err = -EINVAL; goto err_trap_policer_verify; } err = devlink_trap_policer_register(devlink, policer); if (err) goto err_trap_policer_register; } return 0; err_trap_policer_register: err_trap_policer_verify: for (i--; i >= 0; i--) devlink_trap_policer_unregister(devlink, &policers[i]); return err; } EXPORT_SYMBOL_GPL(devl_trap_policers_register); /** * devl_trap_policers_unregister - Unregister packet trap policers from devlink. * @devlink: devlink. * @policers: Packet trap policers. * @policers_count: Count of provided packet trap policers. */ void devl_trap_policers_unregister(struct devlink *devlink, const struct devlink_trap_policer *policers, size_t policers_count) { int i; devl_assert_locked(devlink); for (i = policers_count - 1; i >= 0; i--) devlink_trap_policer_unregister(devlink, &policers[i]); } EXPORT_SYMBOL_GPL(devl_trap_policers_unregister); |
2 2 2 3 6 6 3 3 2 3 8 1 5 5 1 1 1 1 3 1758 1758 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2013-2014 Intel Corp. */ #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/module.h> #include <linux/debugfs.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/pkt_sched.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include <net/6lowpan.h> /* for the compression support */ #define VERSION "0.1" static struct dentry *lowpan_enable_debugfs; static struct dentry *lowpan_control_debugfs; #define IFACE_NAME_TEMPLATE "bt%d" struct skb_cb { struct in6_addr addr; struct in6_addr gw; struct l2cap_chan *chan; }; #define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb)) /* The devices list contains those devices that we are acting * as a proxy. The BT 6LoWPAN device is a virtual device that * connects to the Bluetooth LE device. The real connection to * BT device is done via l2cap layer. There exists one * virtual device / one BT 6LoWPAN network (=hciX device). * The list contains struct lowpan_dev elements. */ static LIST_HEAD(bt_6lowpan_devices); static DEFINE_SPINLOCK(devices_lock); static bool enable_6lowpan; /* We are listening incoming connections via this channel */ static struct l2cap_chan *listen_chan; static DEFINE_MUTEX(set_lock); struct lowpan_peer { struct list_head list; struct rcu_head rcu; struct l2cap_chan *chan; /* peer addresses in various formats */ unsigned char lladdr[ETH_ALEN]; struct in6_addr peer_addr; }; struct lowpan_btle_dev { struct list_head list; struct hci_dev *hdev; struct net_device *netdev; struct list_head peers; atomic_t peer_count; /* number of items in peers list */ struct work_struct delete_netdev; struct delayed_work notify_peers; }; static inline struct lowpan_btle_dev * lowpan_btle_dev(const struct net_device *netdev) { return (struct lowpan_btle_dev *)lowpan_dev(netdev)->priv; } static inline void peer_add(struct lowpan_btle_dev *dev, struct lowpan_peer *peer) { list_add_rcu(&peer->list, &dev->peers); atomic_inc(&dev->peer_count); } static inline bool peer_del(struct lowpan_btle_dev *dev, struct lowpan_peer *peer) { list_del_rcu(&peer->list); kfree_rcu(peer, rcu); module_put(THIS_MODULE); if (atomic_dec_and_test(&dev->peer_count)) { BT_DBG("last peer"); return true; } return false; } static inline struct lowpan_peer * __peer_lookup_chan(struct lowpan_btle_dev *dev, struct l2cap_chan *chan) { struct lowpan_peer *peer; list_for_each_entry_rcu(peer, &dev->peers, list) { if (peer->chan == chan) return peer; } return NULL; } static inline struct lowpan_peer * __peer_lookup_conn(struct lowpan_btle_dev *dev, struct l2cap_conn *conn) { struct lowpan_peer *peer; list_for_each_entry_rcu(peer, &dev->peers, list) { if (peer->chan->conn == conn) return peer; } return NULL; } static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); int count = atomic_read(&dev->peer_count); const struct in6_addr *nexthop; struct lowpan_peer *peer; struct neighbour *neigh; BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt); if (!rt) { if (ipv6_addr_any(&lowpan_cb(skb)->gw)) { /* There is neither route nor gateway, * probably the destination is a direct peer. */ nexthop = daddr; } else { /* There is a known gateway */ nexthop = &lowpan_cb(skb)->gw; } } else { nexthop = rt6_nexthop(rt, daddr); /* We need to remember the address because it is needed * by bt_xmit() when sending the packet. In bt_xmit(), the * destination routing info is not set. */ memcpy(&lowpan_cb(skb)->gw, nexthop, sizeof(struct in6_addr)); } BT_DBG("gw %pI6c", nexthop); rcu_read_lock(); list_for_each_entry_rcu(peer, &dev->peers, list) { BT_DBG("dst addr %pMR dst type %u ip %pI6c", &peer->chan->dst, peer->chan->dst_type, &peer->peer_addr); if (!ipv6_addr_cmp(&peer->peer_addr, nexthop)) { rcu_read_unlock(); return peer; } } /* use the neighbour cache for matching addresses assigned by SLAAC */ neigh = __ipv6_neigh_lookup(dev->netdev, nexthop); if (neigh) { list_for_each_entry_rcu(peer, &dev->peers, list) { if (!memcmp(neigh->ha, peer->lladdr, ETH_ALEN)) { neigh_release(neigh); rcu_read_unlock(); return peer; } } neigh_release(neigh); } rcu_read_unlock(); return NULL; } static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) { struct lowpan_btle_dev *entry; struct lowpan_peer *peer = NULL; rcu_read_lock(); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { peer = __peer_lookup_conn(entry, conn); if (peer) break; } rcu_read_unlock(); return peer; } static struct lowpan_btle_dev *lookup_dev(struct l2cap_conn *conn) { struct lowpan_btle_dev *entry; struct lowpan_btle_dev *dev = NULL; rcu_read_lock(); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { if (conn->hcon->hdev == entry->hdev) { dev = entry; break; } } rcu_read_unlock(); return dev; } static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev) { struct sk_buff *skb_cp; skb_cp = skb_copy(skb, GFP_ATOMIC); if (!skb_cp) return NET_RX_DROP; return netif_rx(skb_cp); } static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, struct lowpan_peer *peer) { const u8 *saddr; saddr = peer->lladdr; return lowpan_header_decompress(skb, netdev, netdev->dev_addr, saddr); } static int recv_pkt(struct sk_buff *skb, struct net_device *dev, struct lowpan_peer *peer) { struct sk_buff *local_skb; int ret; if (!netif_running(dev)) goto drop; if (dev->type != ARPHRD_6LOWPAN || !skb->len) goto drop; skb_reset_network_header(skb); skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto drop; /* check that it's our buffer */ if (lowpan_is_ipv6(*skb_network_header(skb))) { /* Pull off the 1-byte of 6lowpan header. */ skb_pull(skb, 1); /* Copy the packet so that the IPv6 header is * properly aligned. */ local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1, skb_tailroom(skb), GFP_ATOMIC); if (!local_skb) goto drop; local_skb->protocol = htons(ETH_P_IPV6); local_skb->pkt_type = PACKET_HOST; local_skb->dev = dev; skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { kfree_skb(local_skb); goto drop; } dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; consume_skb(local_skb); consume_skb(skb); } else if (lowpan_is_iphc(*skb_network_header(skb))) { local_skb = skb_clone(skb, GFP_ATOMIC); if (!local_skb) goto drop; local_skb->dev = dev; ret = iphc_decompress(local_skb, dev, peer); if (ret < 0) { BT_DBG("iphc_decompress failed: %d", ret); kfree_skb(local_skb); goto drop; } local_skb->protocol = htons(ETH_P_IPV6); local_skb->pkt_type = PACKET_HOST; if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { kfree_skb(local_skb); goto drop; } dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; consume_skb(local_skb); consume_skb(skb); } else { BT_DBG("unknown packet type"); goto drop; } return NET_RX_SUCCESS; drop: dev->stats.rx_dropped++; return NET_RX_DROP; } /* Packet from BT LE device */ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { struct lowpan_btle_dev *dev; struct lowpan_peer *peer; int err; peer = lookup_peer(chan->conn); if (!peer) return -ENOENT; dev = lookup_dev(chan->conn); if (!dev || !dev->netdev) return -ENOENT; err = recv_pkt(skb, dev->netdev, peer); if (err) { BT_DBG("recv pkt %d", err); err = -EAGAIN; } return err; } static int setup_header(struct sk_buff *skb, struct net_device *netdev, bdaddr_t *peer_addr, u8 *peer_addr_type) { struct in6_addr ipv6_daddr; struct ipv6hdr *hdr; struct lowpan_btle_dev *dev; struct lowpan_peer *peer; u8 *daddr; int err, status = 0; hdr = ipv6_hdr(skb); dev = lowpan_btle_dev(netdev); memcpy(&ipv6_daddr, &hdr->daddr, sizeof(ipv6_daddr)); if (ipv6_addr_is_multicast(&ipv6_daddr)) { lowpan_cb(skb)->chan = NULL; daddr = NULL; } else { BT_DBG("dest IP %pI6c", &ipv6_daddr); /* The packet might be sent to 6lowpan interface * because of routing (either via default route * or user set route) so get peer according to * the destination address. */ peer = peer_lookup_dst(dev, &ipv6_daddr, skb); if (!peer) { BT_DBG("no such peer"); return -ENOENT; } daddr = peer->lladdr; *peer_addr = peer->chan->dst; *peer_addr_type = peer->chan->dst_type; lowpan_cb(skb)->chan = peer->chan; status = 1; } lowpan_header_compress(skb, netdev, daddr, dev->netdev->dev_addr); err = dev_hard_header(skb, netdev, ETH_P_IPV6, NULL, NULL, 0); if (err < 0) return err; return status; } static int header_create(struct sk_buff *skb, struct net_device *netdev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) { if (type != ETH_P_IPV6) return -EINVAL; return 0; } /* Packet to BT LE device */ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, struct net_device *netdev) { struct msghdr msg; struct kvec iv; int err; /* Remember the skb so that we can send EAGAIN to the caller if * we run out of credits. */ chan->data = skb; iv.iov_base = skb->data; iv.iov_len = skb->len; memset(&msg, 0, sizeof(msg)); iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iv, 1, skb->len); err = l2cap_chan_send(chan, &msg, skb->len); if (err > 0) { netdev->stats.tx_bytes += err; netdev->stats.tx_packets++; return 0; } if (err < 0) netdev->stats.tx_errors++; return err; } static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) { struct sk_buff *local_skb; struct lowpan_btle_dev *entry; int err = 0; rcu_read_lock(); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { struct lowpan_peer *pentry; struct lowpan_btle_dev *dev; if (entry->netdev != netdev) continue; dev = lowpan_btle_dev(entry->netdev); list_for_each_entry_rcu(pentry, &dev->peers, list) { int ret; local_skb = skb_clone(skb, GFP_ATOMIC); BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p", netdev->name, &pentry->chan->dst, pentry->chan->dst_type, &pentry->peer_addr, pentry->chan); ret = send_pkt(pentry->chan, local_skb, netdev); if (ret < 0) err = ret; kfree_skb(local_skb); } } rcu_read_unlock(); return err; } static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) { int err = 0; bdaddr_t addr; u8 addr_type; /* We must take a copy of the skb before we modify/replace the ipv6 * header as the header could be used elsewhere */ skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) return NET_XMIT_DROP; /* Return values from setup_header() * <0 - error, packet is dropped * 0 - this is a multicast packet * 1 - this is unicast packet */ err = setup_header(skb, netdev, &addr, &addr_type); if (err < 0) { kfree_skb(skb); return NET_XMIT_DROP; } if (err) { if (lowpan_cb(skb)->chan) { BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p", netdev->name, &addr, addr_type, &lowpan_cb(skb)->addr, lowpan_cb(skb)->chan); err = send_pkt(lowpan_cb(skb)->chan, skb, netdev); } else { err = -ENOENT; } } else { /* We need to send the packet to every device behind this * interface. */ err = send_mcast_pkt(skb, netdev); } dev_kfree_skb(skb); if (err) BT_DBG("ERROR: xmit failed (%d)", err); return err < 0 ? NET_XMIT_DROP : err; } static int bt_dev_init(struct net_device *dev) { netdev_lockdep_set_classes(dev); return 0; } static const struct net_device_ops netdev_ops = { .ndo_init = bt_dev_init, .ndo_start_xmit = bt_xmit, }; static const struct header_ops header_ops = { .create = header_create, }; static void netdev_setup(struct net_device *dev) { dev->hard_header_len = 0; dev->needed_tailroom = 0; dev->flags = IFF_RUNNING | IFF_MULTICAST; dev->watchdog_timeo = 0; dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->netdev_ops = &netdev_ops; dev->header_ops = &header_ops; dev->needs_free_netdev = true; } static struct device_type bt_type = { .name = "bluetooth", }; static void ifup(struct net_device *netdev) { int err; rtnl_lock(); err = dev_open(netdev, NULL); if (err < 0) BT_INFO("iface %s cannot be opened (%d)", netdev->name, err); rtnl_unlock(); } static void ifdown(struct net_device *netdev) { rtnl_lock(); dev_close(netdev); rtnl_unlock(); } static void do_notify_peers(struct work_struct *work) { struct lowpan_btle_dev *dev = container_of(work, struct lowpan_btle_dev, notify_peers.work); netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */ } static bool is_bt_6lowpan(struct hci_conn *hcon) { if (hcon->type != LE_LINK) return false; if (!enable_6lowpan) return false; return true; } static struct l2cap_chan *chan_create(void) { struct l2cap_chan *chan; chan = l2cap_chan_create(); if (!chan) return NULL; l2cap_chan_set_defaults(chan); chan->chan_type = L2CAP_CHAN_CONN_ORIENTED; chan->mode = L2CAP_MODE_LE_FLOWCTL; chan->imtu = 1280; return chan; } static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, struct lowpan_btle_dev *dev, bool new_netdev) { struct lowpan_peer *peer; peer = kzalloc(sizeof(*peer), GFP_ATOMIC); if (!peer) return NULL; peer->chan = chan; baswap((void *)peer->lladdr, &chan->dst); lowpan_iphc_uncompress_eui48_lladdr(&peer->peer_addr, peer->lladdr); spin_lock(&devices_lock); INIT_LIST_HEAD(&peer->list); peer_add(dev, peer); spin_unlock(&devices_lock); /* Notifying peers about us needs to be done without locks held */ if (new_netdev) INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers); schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100)); return peer->chan; } static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev) { struct net_device *netdev; bdaddr_t addr; int err; netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)), IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN, netdev_setup); if (!netdev) return -ENOMEM; netdev->addr_assign_type = NET_ADDR_PERM; baswap(&addr, &chan->src); __dev_addr_set(netdev, &addr, sizeof(addr)); netdev->netdev_ops = &netdev_ops; SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev); SET_NETDEV_DEVTYPE(netdev, &bt_type); *dev = lowpan_btle_dev(netdev); (*dev)->netdev = netdev; (*dev)->hdev = chan->conn->hcon->hdev; INIT_LIST_HEAD(&(*dev)->peers); spin_lock(&devices_lock); INIT_LIST_HEAD(&(*dev)->list); list_add_rcu(&(*dev)->list, &bt_6lowpan_devices); spin_unlock(&devices_lock); err = lowpan_register_netdev(netdev, LOWPAN_LLTYPE_BTLE); if (err < 0) { BT_INFO("register_netdev failed %d", err); spin_lock(&devices_lock); list_del_rcu(&(*dev)->list); spin_unlock(&devices_lock); free_netdev(netdev); goto out; } BT_DBG("ifindex %d peer bdaddr %pMR type %d my addr %pMR type %d", netdev->ifindex, &chan->dst, chan->dst_type, &chan->src, chan->src_type); set_bit(__LINK_STATE_PRESENT, &netdev->state); return 0; out: return err; } static inline void chan_ready_cb(struct l2cap_chan *chan) { struct lowpan_btle_dev *dev; bool new_netdev = false; dev = lookup_dev(chan->conn); BT_DBG("chan %p conn %p dev %p", chan, chan->conn, dev); if (!dev) { if (setup_netdev(chan, &dev) < 0) { l2cap_chan_del(chan, -ENOENT); return; } new_netdev = true; } if (!try_module_get(THIS_MODULE)) return; add_peer_chan(chan, dev, new_netdev); ifup(dev->netdev); } static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan) { struct l2cap_chan *chan; chan = chan_create(); if (!chan) return NULL; chan->ops = pchan->ops; BT_DBG("chan %p pchan %p", chan, pchan); return chan; } static void delete_netdev(struct work_struct *work) { struct lowpan_btle_dev *entry = container_of(work, struct lowpan_btle_dev, delete_netdev); lowpan_unregister_netdev(entry->netdev); /* The entry pointer is deleted by the netdev destructor. */ } static void chan_close_cb(struct l2cap_chan *chan) { struct lowpan_btle_dev *entry; struct lowpan_btle_dev *dev = NULL; struct lowpan_peer *peer; int err = -ENOENT; bool last = false, remove = true; BT_DBG("chan %p conn %p", chan, chan->conn); if (chan->conn && chan->conn->hcon) { if (!is_bt_6lowpan(chan->conn->hcon)) return; /* If conn is set, then the netdev is also there and we should * not remove it. */ remove = false; } spin_lock(&devices_lock); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { dev = lowpan_btle_dev(entry->netdev); peer = __peer_lookup_chan(dev, chan); if (peer) { last = peer_del(dev, peer); err = 0; BT_DBG("dev %p removing %speer %p", dev, last ? "last " : "1 ", peer); BT_DBG("chan %p orig refcnt %u", chan, kref_read(&chan->kref)); l2cap_chan_put(chan); break; } } if (!err && last && dev && !atomic_read(&dev->peer_count)) { spin_unlock(&devices_lock); cancel_delayed_work_sync(&dev->notify_peers); ifdown(dev->netdev); if (remove) { INIT_WORK(&entry->delete_netdev, delete_netdev); schedule_work(&entry->delete_netdev); } } else { spin_unlock(&devices_lock); } } static void chan_state_change_cb(struct l2cap_chan *chan, int state, int err) { BT_DBG("chan %p conn %p state %s err %d", chan, chan->conn, state_to_string(state), err); } static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan, unsigned long hdr_len, unsigned long len, int nb) { /* Note that we must allocate using GFP_ATOMIC here as * this function is called originally from netdev hard xmit * function in atomic context. */ return bt_skb_alloc(hdr_len + len, GFP_ATOMIC); } static void chan_suspend_cb(struct l2cap_chan *chan) { struct lowpan_btle_dev *dev; BT_DBG("chan %p suspend", chan); dev = lookup_dev(chan->conn); if (!dev || !dev->netdev) return; netif_stop_queue(dev->netdev); } static void chan_resume_cb(struct l2cap_chan *chan) { struct lowpan_btle_dev *dev; BT_DBG("chan %p resume", chan); dev = lookup_dev(chan->conn); if (!dev || !dev->netdev) return; netif_wake_queue(dev->netdev); } static long chan_get_sndtimeo_cb(struct l2cap_chan *chan) { return L2CAP_CONN_TIMEOUT; } static const struct l2cap_ops bt_6lowpan_chan_ops = { .name = "L2CAP 6LoWPAN channel", .new_connection = chan_new_conn_cb, .recv = chan_recv_cb, .close = chan_close_cb, .state_change = chan_state_change_cb, .ready = chan_ready_cb, .resume = chan_resume_cb, .suspend = chan_suspend_cb, .get_sndtimeo = chan_get_sndtimeo_cb, .alloc_skb = chan_alloc_skb_cb, .teardown = l2cap_chan_no_teardown, .defer = l2cap_chan_no_defer, .set_shutdown = l2cap_chan_no_set_shutdown, }; static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type) { struct l2cap_chan *chan; int err; chan = chan_create(); if (!chan) return -EINVAL; chan->ops = &bt_6lowpan_chan_ops; err = l2cap_chan_connect(chan, cpu_to_le16(L2CAP_PSM_IPSP), 0, addr, dst_type); BT_DBG("chan %p err %d", chan, err); if (err < 0) l2cap_chan_put(chan); return err; } static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type) { struct lowpan_peer *peer; BT_DBG("conn %p dst type %u", conn, dst_type); peer = lookup_peer(conn); if (!peer) return -ENOENT; BT_DBG("peer %p chan %p", peer, peer->chan); l2cap_chan_close(peer->chan, ENOENT); return 0; } static struct l2cap_chan *bt_6lowpan_listen(void) { bdaddr_t *addr = BDADDR_ANY; struct l2cap_chan *chan; int err; if (!enable_6lowpan) return NULL; chan = chan_create(); if (!chan) return NULL; chan->ops = &bt_6lowpan_chan_ops; chan->state = BT_LISTEN; chan->src_type = BDADDR_LE_PUBLIC; atomic_set(&chan->nesting, L2CAP_NESTING_PARENT); BT_DBG("chan %p src type %u", chan, chan->src_type); err = l2cap_add_psm(chan, addr, cpu_to_le16(L2CAP_PSM_IPSP)); if (err) { l2cap_chan_put(chan); BT_ERR("psm cannot be added err %d", err); return NULL; } return chan; } static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, struct l2cap_conn **conn) { struct hci_conn *hcon; struct hci_dev *hdev; int n; n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", &addr->b[5], &addr->b[4], &addr->b[3], &addr->b[2], &addr->b[1], &addr->b[0], addr_type); if (n < 7) return -EINVAL; /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); if (!hdev) return -ENOENT; hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); hci_dev_unlock(hdev); hci_dev_put(hdev); if (!hcon) return -ENOENT; *conn = (struct l2cap_conn *)hcon->l2cap_data; BT_DBG("conn %p dst %pMR type %u", *conn, &hcon->dst, hcon->dst_type); return 0; } static void disconnect_all_peers(void) { struct lowpan_btle_dev *entry; struct lowpan_peer *peer, *tmp_peer, *new_peer; struct list_head peers; INIT_LIST_HEAD(&peers); /* We make a separate list of peers as the close_cb() will * modify the device peers list so it is better not to mess * with the same list at the same time. */ rcu_read_lock(); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { list_for_each_entry_rcu(peer, &entry->peers, list) { new_peer = kmalloc(sizeof(*new_peer), GFP_ATOMIC); if (!new_peer) break; new_peer->chan = peer->chan; INIT_LIST_HEAD(&new_peer->list); list_add(&new_peer->list, &peers); } } rcu_read_unlock(); spin_lock(&devices_lock); list_for_each_entry_safe(peer, tmp_peer, &peers, list) { l2cap_chan_close(peer->chan, ENOENT); list_del_rcu(&peer->list); kfree_rcu(peer, rcu); } spin_unlock(&devices_lock); } struct set_enable { struct work_struct work; bool flag; }; static void do_enable_set(struct work_struct *work) { struct set_enable *set_enable = container_of(work, struct set_enable, work); if (!set_enable->flag || enable_6lowpan != set_enable->flag) /* Disconnect existing connections if 6lowpan is * disabled */ disconnect_all_peers(); enable_6lowpan = set_enable->flag; mutex_lock(&set_lock); if (listen_chan) { l2cap_chan_close(listen_chan, 0); l2cap_chan_put(listen_chan); } listen_chan = bt_6lowpan_listen(); mutex_unlock(&set_lock); kfree(set_enable); } static int lowpan_enable_set(void *data, u64 val) { struct set_enable *set_enable; set_enable = kzalloc(sizeof(*set_enable), GFP_KERNEL); if (!set_enable) return -ENOMEM; set_enable->flag = !!val; INIT_WORK(&set_enable->work, do_enable_set); schedule_work(&set_enable->work); return 0; } static int lowpan_enable_get(void *data, u64 *val) { *val = enable_6lowpan; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get, lowpan_enable_set, "%llu\n"); static ssize_t lowpan_control_write(struct file *fp, const char __user *user_buffer, size_t count, loff_t *position) { char buf[32]; size_t buf_size = min(count, sizeof(buf) - 1); int ret; bdaddr_t addr; u8 addr_type; struct l2cap_conn *conn = NULL; if (copy_from_user(buf, user_buffer, buf_size)) return -EFAULT; buf[buf_size] = '\0'; if (memcmp(buf, "connect ", 8) == 0) { ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn); if (ret == -EINVAL) return ret; mutex_lock(&set_lock); if (listen_chan) { l2cap_chan_close(listen_chan, 0); l2cap_chan_put(listen_chan); listen_chan = NULL; } mutex_unlock(&set_lock); if (conn) { struct lowpan_peer *peer; if (!is_bt_6lowpan(conn->hcon)) return -EINVAL; peer = lookup_peer(conn); if (peer) { BT_DBG("6LoWPAN connection already exists"); return -EALREADY; } BT_DBG("conn %p dst %pMR type %d user %u", conn, &conn->hcon->dst, conn->hcon->dst_type, addr_type); } ret = bt_6lowpan_connect(&addr, addr_type); if (ret < 0) return ret; return count; } if (memcmp(buf, "disconnect ", 11) == 0) { ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn); if (ret < 0) return ret; ret = bt_6lowpan_disconnect(conn, addr_type); if (ret < 0) return ret; return count; } return count; } static int lowpan_control_show(struct seq_file *f, void *ptr) { struct lowpan_btle_dev *entry; struct lowpan_peer *peer; spin_lock(&devices_lock); list_for_each_entry(entry, &bt_6lowpan_devices, list) { list_for_each_entry(peer, &entry->peers, list) seq_printf(f, "%pMR (type %u)\n", &peer->chan->dst, peer->chan->dst_type); } spin_unlock(&devices_lock); return 0; } static int lowpan_control_open(struct inode *inode, struct file *file) { return single_open(file, lowpan_control_show, inode->i_private); } static const struct file_operations lowpan_control_fops = { .open = lowpan_control_open, .read = seq_read, .write = lowpan_control_write, .llseek = seq_lseek, .release = single_release, }; static void disconnect_devices(void) { struct lowpan_btle_dev *entry, *tmp, *new_dev; struct list_head devices; INIT_LIST_HEAD(&devices); /* We make a separate list of devices because the unregister_netdev() * will call device_event() which will also want to modify the same * devices list. */ rcu_read_lock(); list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { new_dev = kmalloc(sizeof(*new_dev), GFP_ATOMIC); if (!new_dev) break; new_dev->netdev = entry->netdev; INIT_LIST_HEAD(&new_dev->list); list_add_rcu(&new_dev->list, &devices); } rcu_read_unlock(); list_for_each_entry_safe(entry, tmp, &devices, list) { ifdown(entry->netdev); BT_DBG("Unregistering netdev %s %p", entry->netdev->name, entry->netdev); lowpan_unregister_netdev(entry->netdev); kfree(entry); } } static int device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); struct lowpan_btle_dev *entry; if (netdev->type != ARPHRD_6LOWPAN) return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: spin_lock(&devices_lock); list_for_each_entry(entry, &bt_6lowpan_devices, list) { if (entry->netdev == netdev) { BT_DBG("Unregistered netdev %s %p", netdev->name, netdev); list_del(&entry->list); break; } } spin_unlock(&devices_lock); break; } return NOTIFY_DONE; } static struct notifier_block bt_6lowpan_dev_notifier = { .notifier_call = device_event, }; static int __init bt_6lowpan_init(void) { lowpan_enable_debugfs = debugfs_create_file_unsafe("6lowpan_enable", 0644, bt_debugfs, NULL, &lowpan_enable_fops); lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644, bt_debugfs, NULL, &lowpan_control_fops); return register_netdevice_notifier(&bt_6lowpan_dev_notifier); } static void __exit bt_6lowpan_exit(void) { debugfs_remove(lowpan_enable_debugfs); debugfs_remove(lowpan_control_debugfs); if (listen_chan) { l2cap_chan_close(listen_chan, 0); l2cap_chan_put(listen_chan); } disconnect_devices(); unregister_netdevice_notifier(&bt_6lowpan_dev_notifier); } module_init(bt_6lowpan_init); module_exit(bt_6lowpan_exit); MODULE_AUTHOR("Jukka Rissanen <jukka.rissanen@linux.intel.com>"); MODULE_DESCRIPTION("Bluetooth 6LoWPAN"); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020 Google LLC. */ #include <linux/filter.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/binfmts.h> #include <linux/lsm_hooks.h> #include <linux/bpf_lsm.h> #include <linux/kallsyms.h> #include <linux/bpf_verifier.h> #include <net/bpf_sk_storage.h> #include <linux/bpf_local_storage.h> #include <linux/btf_ids.h> #include <linux/ima.h> #include <linux/bpf-cgroup.h> /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached. */ #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ noinline RET bpf_lsm_##NAME(__VA_ARGS__) \ { \ return DEFAULT; \ } #include <linux/lsm_hook_defs.h> #undef LSM_HOOK #define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME) BTF_SET_START(bpf_lsm_hooks) #include <linux/lsm_hook_defs.h> #undef LSM_HOOK BTF_SET_END(bpf_lsm_hooks) /* List of LSM hooks that should operate on 'current' cgroup regardless * of function signature. */ BTF_SET_START(bpf_lsm_current_hooks) /* operate on freshly allocated sk without any cgroup association */ #ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) #endif BTF_SET_END(bpf_lsm_current_hooks) /* List of LSM hooks that trigger while the socket is properly locked. */ BTF_SET_START(bpf_lsm_locked_sockopt_hooks) #ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_sock_graft) BTF_ID(func, bpf_lsm_inet_csk_clone) BTF_ID(func, bpf_lsm_inet_conn_established) #endif BTF_SET_END(bpf_lsm_locked_sockopt_hooks) /* List of LSM hooks that trigger while the socket is _not_ locked, * but it's ok to call bpf_{g,s}etsockopt because the socket is still * in the early init phase. */ BTF_SET_START(bpf_lsm_unlocked_sockopt_hooks) #ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_socket_post_create) BTF_ID(func, bpf_lsm_socket_socketpair) #endif BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks) #ifdef CONFIG_CGROUP_BPF void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func) { const struct btf_param *args __maybe_unused; if (btf_type_vlen(prog->aux->attach_func_proto) < 1 || btf_id_set_contains(&bpf_lsm_current_hooks, prog->aux->attach_btf_id)) { *bpf_func = __cgroup_bpf_run_lsm_current; return; } #ifdef CONFIG_NET args = btf_params(prog->aux->attach_func_proto); if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET]) *bpf_func = __cgroup_bpf_run_lsm_socket; else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK]) *bpf_func = __cgroup_bpf_run_lsm_sock; else #endif *bpf_func = __cgroup_bpf_run_lsm_current; } #endif int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { if (!prog->gpl_compatible) { bpf_log(vlog, "LSM programs must have a GPL compatible license\n"); return -EINVAL; } if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) { bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n", prog->aux->attach_btf_id, prog->aux->attach_func_name); return -EINVAL; } return 0; } /* Mask for all the currently supported BPRM option flags */ #define BPF_F_BRPM_OPTS_MASK BPF_F_BPRM_SECUREEXEC BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags) { if (flags & ~BPF_F_BRPM_OPTS_MASK) return -EINVAL; bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC); return 0; } BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm) static const struct bpf_func_proto bpf_bprm_opts_set_proto = { .func = bpf_bprm_opts_set, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_bprm_opts_set_btf_ids[0], .arg2_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size) { return ima_inode_hash(inode, dst, size); } static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog) { return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); } BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode) static const struct bpf_func_proto bpf_ima_inode_hash_proto = { .func = bpf_ima_inode_hash, .gpl_only = false, .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0], .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE, .allowed = bpf_ima_inode_hash_allowed, }; BPF_CALL_3(bpf_ima_file_hash, struct file *, file, void *, dst, u32, size) { return ima_file_hash(file, dst, size); } BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file) static const struct bpf_func_proto bpf_ima_file_hash_proto = { .func = bpf_ima_file_hash, .gpl_only = false, .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_ima_file_hash_btf_ids[0], .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE, .allowed = bpf_ima_inode_hash_allowed, }; BPF_CALL_1(bpf_get_attach_cookie, void *, ctx) { struct bpf_trace_run_ctx *run_ctx; run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); return run_ctx->bpf_cookie; } static const struct bpf_func_proto bpf_get_attach_cookie_proto = { .func = bpf_get_attach_cookie, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { const struct bpf_func_proto *func_proto; if (prog->expected_attach_type == BPF_LSM_CGROUP) { func_proto = cgroup_common_func_proto(func_id, prog); if (func_proto) return func_proto; } switch (func_id) { case BPF_FUNC_inode_storage_get: return &bpf_inode_storage_get_proto; case BPF_FUNC_inode_storage_delete: return &bpf_inode_storage_delete_proto; #ifdef CONFIG_NET case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; #endif /* CONFIG_NET */ case BPF_FUNC_spin_lock: return &bpf_spin_lock_proto; case BPF_FUNC_spin_unlock: return &bpf_spin_unlock_proto; case BPF_FUNC_bprm_opts_set: return &bpf_bprm_opts_set_proto; case BPF_FUNC_ima_inode_hash: return &bpf_ima_inode_hash_proto; case BPF_FUNC_ima_file_hash: return &bpf_ima_file_hash_proto; case BPF_FUNC_get_attach_cookie: return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL; #ifdef CONFIG_NET case BPF_FUNC_setsockopt: if (prog->expected_attach_type != BPF_LSM_CGROUP) return NULL; if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks, prog->aux->attach_btf_id)) return &bpf_sk_setsockopt_proto; if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks, prog->aux->attach_btf_id)) return &bpf_unlocked_sk_setsockopt_proto; return NULL; case BPF_FUNC_getsockopt: if (prog->expected_attach_type != BPF_LSM_CGROUP) return NULL; if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks, prog->aux->attach_btf_id)) return &bpf_sk_getsockopt_proto; if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks, prog->aux->attach_btf_id)) return &bpf_unlocked_sk_getsockopt_proto; return NULL; #endif default: return tracing_prog_func_proto(func_id, prog); } } /* The set of hooks which are called without pagefaults disabled and are allowed * to "sleep" and thus can be used for sleepable BPF programs. */ BTF_SET_START(sleepable_lsm_hooks) BTF_ID(func, bpf_lsm_bpf) BTF_ID(func, bpf_lsm_bpf_map) BTF_ID(func, bpf_lsm_bpf_map_alloc_security) BTF_ID(func, bpf_lsm_bpf_map_free_security) BTF_ID(func, bpf_lsm_bpf_prog) BTF_ID(func, bpf_lsm_bprm_check_security) BTF_ID(func, bpf_lsm_bprm_committed_creds) BTF_ID(func, bpf_lsm_bprm_committing_creds) BTF_ID(func, bpf_lsm_bprm_creds_for_exec) BTF_ID(func, bpf_lsm_bprm_creds_from_file) BTF_ID(func, bpf_lsm_capget) BTF_ID(func, bpf_lsm_capset) BTF_ID(func, bpf_lsm_cred_prepare) BTF_ID(func, bpf_lsm_file_ioctl) BTF_ID(func, bpf_lsm_file_lock) BTF_ID(func, bpf_lsm_file_open) BTF_ID(func, bpf_lsm_file_receive) #ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_inet_conn_established) #endif /* CONFIG_SECURITY_NETWORK */ BTF_ID(func, bpf_lsm_inode_create) BTF_ID(func, bpf_lsm_inode_free_security) BTF_ID(func, bpf_lsm_inode_getattr) BTF_ID(func, bpf_lsm_inode_getxattr) BTF_ID(func, bpf_lsm_inode_mknod) BTF_ID(func, bpf_lsm_inode_need_killpriv) BTF_ID(func, bpf_lsm_inode_post_setxattr) BTF_ID(func, bpf_lsm_inode_readlink) BTF_ID(func, bpf_lsm_inode_rename) BTF_ID(func, bpf_lsm_inode_rmdir) BTF_ID(func, bpf_lsm_inode_setattr) BTF_ID(func, bpf_lsm_inode_setxattr) BTF_ID(func, bpf_lsm_inode_symlink) BTF_ID(func, bpf_lsm_inode_unlink) BTF_ID(func, bpf_lsm_kernel_module_request) BTF_ID(func, bpf_lsm_kernel_read_file) BTF_ID(func, bpf_lsm_kernfs_init_security) #ifdef CONFIG_KEYS BTF_ID(func, bpf_lsm_key_free) #endif /* CONFIG_KEYS */ BTF_ID(func, bpf_lsm_mmap_file) BTF_ID(func, bpf_lsm_netlink_send) BTF_ID(func, bpf_lsm_path_notify) BTF_ID(func, bpf_lsm_release_secctx) BTF_ID(func, bpf_lsm_sb_alloc_security) BTF_ID(func, bpf_lsm_sb_eat_lsm_opts) BTF_ID(func, bpf_lsm_sb_kern_mount) BTF_ID(func, bpf_lsm_sb_mount) BTF_ID(func, bpf_lsm_sb_remount) BTF_ID(func, bpf_lsm_sb_set_mnt_opts) BTF_ID(func, bpf_lsm_sb_show_options) BTF_ID(func, bpf_lsm_sb_statfs) BTF_ID(func, bpf_lsm_sb_umount) BTF_ID(func, bpf_lsm_settime) #ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_socket_accept) BTF_ID(func, bpf_lsm_socket_bind) BTF_ID(func, bpf_lsm_socket_connect) BTF_ID(func, bpf_lsm_socket_create) BTF_ID(func, bpf_lsm_socket_getpeername) BTF_ID(func, bpf_lsm_socket_getpeersec_dgram) BTF_ID(func, bpf_lsm_socket_getsockname) BTF_ID(func, bpf_lsm_socket_getsockopt) BTF_ID(func, bpf_lsm_socket_listen) BTF_ID(func, bpf_lsm_socket_post_create) BTF_ID(func, bpf_lsm_socket_recvmsg) BTF_ID(func, bpf_lsm_socket_sendmsg) BTF_ID(func, bpf_lsm_socket_shutdown) BTF_ID(func, bpf_lsm_socket_socketpair) #endif /* CONFIG_SECURITY_NETWORK */ BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) BTF_ID(func, bpf_lsm_current_getsecid_subj) BTF_ID(func, bpf_lsm_task_getsecid_obj) BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) BTF_ID(func, bpf_lsm_task_to_inode) BTF_ID(func, bpf_lsm_userns_create) BTF_SET_END(sleepable_lsm_hooks) BTF_SET_START(untrusted_lsm_hooks) BTF_ID(func, bpf_lsm_bpf_map_free_security) BTF_ID(func, bpf_lsm_bpf_prog_alloc_security) BTF_ID(func, bpf_lsm_bpf_prog_free_security) BTF_ID(func, bpf_lsm_file_alloc_security) BTF_ID(func, bpf_lsm_file_free_security) #ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) #endif /* CONFIG_SECURITY_NETWORK */ BTF_ID(func, bpf_lsm_task_free) BTF_SET_END(untrusted_lsm_hooks) bool bpf_lsm_is_sleepable_hook(u32 btf_id) { return btf_id_set_contains(&sleepable_lsm_hooks, btf_id); } bool bpf_lsm_is_trusted(const struct bpf_prog *prog) { return !btf_id_set_contains(&untrusted_lsm_hooks, prog->aux->attach_btf_id); } const struct bpf_prog_ops lsm_prog_ops = { }; const struct bpf_verifier_ops lsm_verifier_ops = { .get_func_proto = bpf_lsm_func_proto, .is_valid_access = btf_ctx_access, }; |
3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match FRAG parameters. */ /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_frag.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: IPv6 fragment match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); /* Returns 1 if the id is matched by the range, 0 otherwise */ static inline bool id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { bool r; pr_debug("id_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ', min, id, max); r = (id >= min && id <= max) ^ invert; pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } static bool frag_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct frag_hdr _frag; const struct frag_hdr *fh; const struct ip6t_frag *fraginfo = par->matchinfo; unsigned int ptr = 0; int err; err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; return false; } fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); if (fh == NULL) { par->hotdrop = true; return false; } pr_debug("INFO %04X ", fh->frag_off); pr_debug("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); pr_debug("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); pr_debug("MF %04X ", fh->frag_off & htons(IP6_MF)); pr_debug("ID %u %08X\n", ntohl(fh->identification), ntohl(fh->identification)); pr_debug("IPv6 FRAG id %02X ", id_match(fraginfo->ids[0], fraginfo->ids[1], ntohl(fh->identification), !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))); pr_debug("res %02X %02X%04X %02X ", fraginfo->flags & IP6T_FRAG_RES, fh->reserved, ntohs(fh->frag_off) & 0x6, !((fraginfo->flags & IP6T_FRAG_RES) && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); pr_debug("first %02X %02X %02X ", fraginfo->flags & IP6T_FRAG_FST, ntohs(fh->frag_off) & ~0x7, !((fraginfo->flags & IP6T_FRAG_FST) && (ntohs(fh->frag_off) & ~0x7))); pr_debug("mf %02X %02X %02X ", fraginfo->flags & IP6T_FRAG_MF, ntohs(fh->frag_off) & IP6_MF, !((fraginfo->flags & IP6T_FRAG_MF) && !((ntohs(fh->frag_off) & IP6_MF)))); pr_debug("last %02X %02X %02X\n", fraginfo->flags & IP6T_FRAG_NMF, ntohs(fh->frag_off) & IP6_MF, !((fraginfo->flags & IP6T_FRAG_NMF) && (ntohs(fh->frag_off) & IP6_MF))); return id_match(fraginfo->ids[0], fraginfo->ids[1], ntohl(fh->identification), !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) && !((fraginfo->flags & IP6T_FRAG_RES) && (fh->reserved || (ntohs(fh->frag_off) & 0x6))) && !((fraginfo->flags & IP6T_FRAG_FST) && (ntohs(fh->frag_off) & ~0x7)) && !((fraginfo->flags & IP6T_FRAG_MF) && !(ntohs(fh->frag_off) & IP6_MF)) && !((fraginfo->flags & IP6T_FRAG_NMF) && (ntohs(fh->frag_off) & IP6_MF)); } static int frag_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_frag *fraginfo = par->matchinfo; if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { pr_debug("unknown flags %X\n", fraginfo->invflags); return -EINVAL; } return 0; } static struct xt_match frag_mt6_reg __read_mostly = { .name = "frag", .family = NFPROTO_IPV6, .match = frag_mt6, .matchsize = sizeof(struct ip6t_frag), .checkentry = frag_mt6_check, .me = THIS_MODULE, }; static int __init frag_mt6_init(void) { return xt_register_match(&frag_mt6_reg); } static void __exit frag_mt6_exit(void) { xt_unregister_match(&frag_mt6_reg); } module_init(frag_mt6_init); module_exit(frag_mt6_exit); |
15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_GENERIC_PGALLOC_H #define __ASM_GENERIC_PGALLOC_H #ifdef CONFIG_MMU #define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO) #define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT) /** * __pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table * @mm: the mm_struct of the current context * * This function is intended for architectures that need * anything beyond simple page allocation. * * Return: pointer to the allocated memory or %NULL on error */ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) { struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_HIGHMEM, 0); if (!ptdesc) return NULL; return ptdesc_address(ptdesc); } #ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL /** * pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table * @mm: the mm_struct of the current context * * Return: pointer to the allocated memory or %NULL on error */ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { return __pte_alloc_one_kernel(mm); } #endif /** * pte_free_kernel - free PTE-level kernel page table memory * @mm: the mm_struct of the current context * @pte: pointer to the memory containing the page table */ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { pagetable_free(virt_to_ptdesc(pte)); } /** * __pte_alloc_one - allocate memory for a PTE-level user page table * @mm: the mm_struct of the current context * @gfp: GFP flags to use for the allocation * * Allocate memory for a page table and ptdesc and runs pagetable_pte_ctor(). * * This function is intended for architectures that need * anything beyond simple page allocation or must have custom GFP flags. * * Return: `struct page` referencing the ptdesc or %NULL on error */ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) { struct ptdesc *ptdesc; ptdesc = pagetable_alloc(gfp, 0); if (!ptdesc) return NULL; if (!pagetable_pte_ctor(ptdesc)) { pagetable_free(ptdesc); return NULL; } return ptdesc_page(ptdesc); } #ifndef __HAVE_ARCH_PTE_ALLOC_ONE /** * pte_alloc_one - allocate a page for PTE-level user page table * @mm: the mm_struct of the current context * * Allocate memory for a page table and ptdesc and runs pagetable_pte_ctor(). * * Return: `struct page` referencing the ptdesc or %NULL on error */ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { return __pte_alloc_one(mm, GFP_PGTABLE_USER); } #endif /* * Should really implement gc for free page table pages. This could be * done with a reference count in struct page. */ /** * pte_free - free PTE-level user page table memory * @mm: the mm_struct of the current context * @pte_page: the `struct page` referencing the ptdesc */ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) { struct ptdesc *ptdesc = page_ptdesc(pte_page); pagetable_pte_dtor(ptdesc); pagetable_free(ptdesc); } #if CONFIG_PGTABLE_LEVELS > 2 #ifndef __HAVE_ARCH_PMD_ALLOC_ONE /** * pmd_alloc_one - allocate memory for a PMD-level page table * @mm: the mm_struct of the current context * * Allocate memory for a page table and ptdesc and runs pagetable_pmd_ctor(). * * Allocations use %GFP_PGTABLE_USER in user context and * %GFP_PGTABLE_KERNEL in kernel context. * * Return: pointer to the allocated memory or %NULL on error */ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct ptdesc *ptdesc; gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; ptdesc = pagetable_alloc(gfp, 0); if (!ptdesc) return NULL; if (!pagetable_pmd_ctor(ptdesc)) { pagetable_free(ptdesc); return NULL; } return ptdesc_address(ptdesc); } #endif #ifndef __HAVE_ARCH_PMD_FREE static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { struct ptdesc *ptdesc = virt_to_ptdesc(pmd); BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); pagetable_pmd_dtor(ptdesc); pagetable_free(ptdesc); } #endif #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) { gfp_t gfp = GFP_PGTABLE_USER; struct ptdesc *ptdesc; if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; gfp &= ~__GFP_HIGHMEM; ptdesc = pagetable_alloc(gfp, 0); if (!ptdesc) return NULL; pagetable_pud_ctor(ptdesc); return ptdesc_address(ptdesc); } #ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** * pud_alloc_one - allocate memory for a PUD-level page table * @mm: the mm_struct of the current context * * Allocate memory for a page table using %GFP_PGTABLE_USER for user context * and %GFP_PGTABLE_KERNEL for kernel context. * * Return: pointer to the allocated memory or %NULL on error */ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { return __pud_alloc_one(mm, addr); } #endif static inline void __pud_free(struct mm_struct *mm, pud_t *pud) { struct ptdesc *ptdesc = virt_to_ptdesc(pud); BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); pagetable_pud_dtor(ptdesc); pagetable_free(ptdesc); } #ifndef __HAVE_ARCH_PUD_FREE static inline void pud_free(struct mm_struct *mm, pud_t *pud) { __pud_free(mm, pud); } #endif #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #ifndef __HAVE_ARCH_PGD_FREE static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { pagetable_free(virt_to_ptdesc(pgd)); } #endif #endif /* CONFIG_MMU */ #endif /* __ASM_GENERIC_PGALLOC_H */ |
2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> * (C) 2011 Intra2net AG <https://www.intra2net.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nfnetlink_acct.h> #include <linux/netfilter/xt_nfacct.h> MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_nfacct"); MODULE_ALIAS("ip6t_nfacct"); static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) { int overquota; const struct xt_nfacct_match_info *info = par->targinfo; nfnl_acct_update(skb, info->nfacct); overquota = nfnl_acct_overquota(xt_net(par), info->nfacct); return overquota != NFACCT_UNDERQUOTA; } static int nfacct_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_nfacct_match_info *info = par->matchinfo; struct nf_acct *nfacct; nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { pr_info_ratelimited("accounting object `%s' does not exists\n", info->name); return -ENOENT; } info->nfacct = nfacct; return 0; } static void nfacct_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_nfacct_match_info *info = par->matchinfo; nfnl_acct_put(info->nfacct); } static struct xt_match nfacct_mt_reg[] __read_mostly = { { .name = "nfacct", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = nfacct_mt_checkentry, .match = nfacct_mt, .destroy = nfacct_mt_destroy, .matchsize = sizeof(struct xt_nfacct_match_info), .usersize = offsetof(struct xt_nfacct_match_info, nfacct), .me = THIS_MODULE, }, { .name = "nfacct", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = nfacct_mt_checkentry, .match = nfacct_mt, .destroy = nfacct_mt_destroy, .matchsize = sizeof(struct xt_nfacct_match_info_v1), .usersize = offsetof(struct xt_nfacct_match_info_v1, nfacct), .me = THIS_MODULE, }, }; static int __init nfacct_mt_init(void) { return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } static void __exit nfacct_mt_exit(void) { xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } module_init(nfacct_mt_init); module_exit(nfacct_mt_exit); |
43 1 42 33 6 3 28 128 127 1 21 99 10 10 1 2 2 1 1 1 1 1 1 34 12 3 7 3 1 1 3 18 2 9 7 1 7 1 6 1 7 1 6 2 7 1 1 1 1 1 1 1 1 7 7 19 2 10 5 2 6 1 7 6 1 6 1 7 4 57 40 17 16 20 118 3 91 107 108 107 65 37 33 47 20 37 29 59 3 4 58 7 64 58 8 53 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Nicira, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/in6.h> #include <linux/inetdevice.h> #include <linux/netfilter_ipv4.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/static_key.h> #include <net/ip.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> #include <net/ip6_checksum.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/dst_metadata.h> #include <net/geneve.h> #include <net/vxlan.h> #include <net/erspan.h> const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; EXPORT_SYMBOL(iptun_encaps); const struct ip6_tnl_encap_ops __rcu * ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; EXPORT_SYMBOL(ip6tun_encaps); void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; struct iphdr *iph; int err; skb_scrub_packet(skb, xnet); skb_clear_hash_if_not_l4(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df; iph->protocol = proto; iph->tos = tos; iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out(net, sk, skb); if (dev) { if (unlikely(net_xmit_eval(err))) pkt_len = 0; iptunnel_xmit_stats(dev, pkt_len); } } EXPORT_SYMBOL_GPL(iptunnel_xmit); int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool raw_proto, bool xnet) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; skb_pull_rcsum(skb, hdr_len); if (!raw_proto && inner_proto == htons(ETH_P_TEB)) { struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) return -ENOMEM; eh = (struct ethhdr *)skb->data; if (likely(eth_proto_is_802_3(eh->h_proto))) skb->protocol = eh->h_proto; else skb->protocol = htons(ETH_P_802_2); } else { skb->protocol = inner_proto; } skb_clear_hash_if_not_l4(skb); __vlan_hwaccel_clear_tag(skb); skb_set_queue_mapping(skb, 0); skb_scrub_packet(skb, xnet); return iptunnel_pull_offloads(skb); } EXPORT_SYMBOL_GPL(__iptunnel_pull_header); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags) { struct metadata_dst *res; struct ip_tunnel_info *dst, *src; if (!md || md->type != METADATA_IP_TUNNEL || md->u.tun_info.mode & IP_TUNNEL_INFO_TX) return NULL; src = &md->u.tun_info; res = metadata_dst_alloc(src->options_len, METADATA_IP_TUNNEL, flags); if (!res) return NULL; dst = &res->u.tun_info; dst->key.tun_id = src->key.tun_id; if (src->mode & IP_TUNNEL_INFO_IPV6) memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src, sizeof(struct in6_addr)); else dst->key.u.ipv4.dst = src->key.u.ipv4.src; dst->key.tun_flags = src->key.tun_flags; dst->mode = src->mode | IP_TUNNEL_INFO_TX; ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src), src->options_len, 0); return res; } EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask) { int err; if (likely(!skb->encapsulation)) { skb_reset_inner_headers(skb); skb->encapsulation = 1; } if (skb_is_gso(skb)) { err = skb_header_unclone(skb, GFP_ATOMIC); if (unlikely(err)) return err; skb_shinfo(skb)->gso_type |= gso_type_mask; return 0; } if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_NONE; /* We clear encapsulation here to prevent badly-written * drivers potentially deciding to offload an inner checksum * if we set CHECKSUM_PARTIAL on the outer header. * This should go away when the drivers are all fixed. */ skb->encapsulation = 0; } return 0; } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); /** * iptunnel_pmtud_build_icmp() - Build ICMP error message for PMTUD * @skb: Original packet with L2 header * @mtu: MTU value for ICMP error * * Return: length on success, negative error code if message couldn't be built. */ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) { const struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph; struct iphdr *niph; struct ethhdr eh; int len, err; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) return -EINVAL; skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); skb_reset_network_header(skb); err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph)); if (err) return err; len = skb->len + sizeof(*icmph); err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN); if (err) return err; icmph = skb_push(skb, sizeof(*icmph)); *icmph = (struct icmphdr) { .type = ICMP_DEST_UNREACH, .code = ICMP_FRAG_NEEDED, .checksum = 0, .un.frag.__unused = 0, .un.frag.mtu = htons(mtu), }; icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0)); skb_reset_transport_header(skb); niph = skb_push(skb, sizeof(*niph)); *niph = (struct iphdr) { .ihl = sizeof(*niph) / 4u, .version = 4, .tos = 0, .tot_len = htons(len + sizeof(*niph)), .id = 0, .frag_off = htons(IP_DF), .ttl = iph->ttl, .protocol = IPPROTO_ICMP, .saddr = iph->daddr, .daddr = iph->saddr, }; ip_send_check(niph); skb_reset_network_header(skb); skb->ip_summed = CHECKSUM_NONE; eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0); skb_reset_mac_header(skb); return skb->len; } /** * iptunnel_pmtud_check_icmp() - Trigger ICMP reply if needed and allowed * @skb: Buffer being sent by encapsulation, L2 headers expected * @mtu: Network MTU for path * * Return: 0 for no ICMP reply, length if built, negative value on error. */ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) { const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); if (mtu < 576 || iph->frag_off != htons(IP_DF)) return 0; if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) || ipv4_is_zeronet(iph->saddr) || ipv4_is_loopback(iph->saddr) || ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr)) return 0; if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type)) return 0; return iptunnel_pmtud_build_icmp(skb, mtu); } #if IS_ENABLED(CONFIG_IPV6) /** * iptunnel_pmtud_build_icmpv6() - Build ICMPv6 error message for PMTUD * @skb: Original packet with L2 header * @mtu: MTU value for ICMPv6 error * * Return: length on success, negative error code if message couldn't be built. */ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct icmp6hdr *icmp6h; struct ipv6hdr *nip6h; struct ethhdr eh; int len, err; __wsum csum; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) return -EINVAL; skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); skb_reset_network_header(skb); err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h)); if (err) return err; len = skb->len + sizeof(*icmp6h); err = skb_cow(skb, sizeof(*nip6h) + sizeof(*icmp6h) + ETH_HLEN); if (err) return err; icmp6h = skb_push(skb, sizeof(*icmp6h)); *icmp6h = (struct icmp6hdr) { .icmp6_type = ICMPV6_PKT_TOOBIG, .icmp6_code = 0, .icmp6_cksum = 0, .icmp6_mtu = htonl(mtu), }; skb_reset_transport_header(skb); nip6h = skb_push(skb, sizeof(*nip6h)); *nip6h = (struct ipv6hdr) { .priority = 0, .version = 6, .flow_lbl = { 0 }, .payload_len = htons(len), .nexthdr = IPPROTO_ICMPV6, .hop_limit = ip6h->hop_limit, .saddr = ip6h->daddr, .daddr = ip6h->saddr, }; skb_reset_network_header(skb); csum = csum_partial(icmp6h, len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); skb->ip_summed = CHECKSUM_NONE; eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0); skb_reset_mac_header(skb); return skb->len; } /** * iptunnel_pmtud_check_icmpv6() - Trigger ICMPv6 reply if needed and allowed * @skb: Buffer being sent by encapsulation, L2 headers expected * @mtu: Network MTU for path * * Return: 0 for no ICMPv6 reply, length if built, negative value on error. */ static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); int stype = ipv6_addr_type(&ip6h->saddr); u8 proto = ip6h->nexthdr; __be16 frag_off; int offset; if (mtu < IPV6_MIN_MTU) return 0; if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST || stype == IPV6_ADDR_LOOPBACK) return 0; offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto, &frag_off); if (offset < 0 || (frag_off & htons(~0x7))) return 0; if (proto == IPPROTO_ICMPV6) { struct icmp6hdr *icmp6h; if (!pskb_may_pull(skb, skb_network_header(skb) + offset + 1 - skb->data)) return 0; icmp6h = (struct icmp6hdr *)(skb_network_header(skb) + offset); if (icmpv6_is_err(icmp6h->icmp6_type) || icmp6h->icmp6_type == NDISC_REDIRECT) return 0; } return iptunnel_pmtud_build_icmpv6(skb, mtu); } #endif /* IS_ENABLED(CONFIG_IPV6) */ /** * skb_tunnel_check_pmtu() - Check, update PMTU and trigger ICMP reply as needed * @skb: Buffer being sent by encapsulation, L2 headers expected * @encap_dst: Destination for tunnel encapsulation (outer IP) * @headroom: Encapsulation header size, bytes * @reply: Build matching ICMP or ICMPv6 message as a result * * L2 tunnel implementations that can carry IP and can be directly bridged * (currently UDP tunnels) can't always rely on IP forwarding paths to handle * PMTU discovery. In the bridged case, ICMP or ICMPv6 messages need to be built * based on payload and sent back by the encapsulation itself. * * For routable interfaces, we just need to update the PMTU for the destination. * * Return: 0 if ICMP error not needed, length if built, negative value on error */ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply) { u32 mtu = dst_mtu(encap_dst) - headroom; if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu)) return 0; skb_dst_update_pmtu_no_confirm(skb, mtu); if (!reply || skb->pkt_type == PACKET_HOST) return 0; if (skb->protocol == htons(ETH_P_IP)) return iptunnel_pmtud_check_icmp(skb, mtu); #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) return iptunnel_pmtud_check_icmpv6(skb, mtu); #endif return 0; } EXPORT_SYMBOL(skb_tunnel_check_pmtu); static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { [LWTUNNEL_IP_UNSPEC] = { .strict_start_type = LWTUNNEL_IP_OPTS }, [LWTUNNEL_IP_ID] = { .type = NLA_U64 }, [LWTUNNEL_IP_DST] = { .type = NLA_U32 }, [LWTUNNEL_IP_SRC] = { .type = NLA_U32 }, [LWTUNNEL_IP_TTL] = { .type = NLA_U8 }, [LWTUNNEL_IP_TOS] = { .type = NLA_U8 }, [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 }, [LWTUNNEL_IP_OPTS] = { .type = NLA_NESTED }, }; static const struct nla_policy ip_opts_policy[LWTUNNEL_IP_OPTS_MAX + 1] = { [LWTUNNEL_IP_OPTS_GENEVE] = { .type = NLA_NESTED }, [LWTUNNEL_IP_OPTS_VXLAN] = { .type = NLA_NESTED }, [LWTUNNEL_IP_OPTS_ERSPAN] = { .type = NLA_NESTED }, }; static const struct nla_policy geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = { [LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, }; static const struct nla_policy vxlan_opt_policy[LWTUNNEL_IP_OPT_VXLAN_MAX + 1] = { [LWTUNNEL_IP_OPT_VXLAN_GBP] = { .type = NLA_U32 }, }; static const struct nla_policy erspan_opt_policy[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1] = { [LWTUNNEL_IP_OPT_ERSPAN_VER] = { .type = NLA_U8 }, [LWTUNNEL_IP_OPT_ERSPAN_INDEX] = { .type = NLA_U32 }, [LWTUNNEL_IP_OPT_ERSPAN_DIR] = { .type = NLA_U8 }, [LWTUNNEL_IP_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, }; static int ip_tun_parse_opts_geneve(struct nlattr *attr, struct ip_tunnel_info *info, int opts_len, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1]; int data_len, err; err = nla_parse_nested(tb, LWTUNNEL_IP_OPT_GENEVE_MAX, attr, geneve_opt_policy, extack); if (err) return err; if (!tb[LWTUNNEL_IP_OPT_GENEVE_CLASS] || !tb[LWTUNNEL_IP_OPT_GENEVE_TYPE] || !tb[LWTUNNEL_IP_OPT_GENEVE_DATA]) return -EINVAL; attr = tb[LWTUNNEL_IP_OPT_GENEVE_DATA]; data_len = nla_len(attr); if (data_len % 4) return -EINVAL; if (info) { struct geneve_opt *opt = ip_tunnel_info_opts(info) + opts_len; memcpy(opt->opt_data, nla_data(attr), data_len); opt->length = data_len / 4; attr = tb[LWTUNNEL_IP_OPT_GENEVE_CLASS]; opt->opt_class = nla_get_be16(attr); attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE]; opt->type = nla_get_u8(attr); info->key.tun_flags |= TUNNEL_GENEVE_OPT; } return sizeof(struct geneve_opt) + data_len; } static int ip_tun_parse_opts_vxlan(struct nlattr *attr, struct ip_tunnel_info *info, int opts_len, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1]; int err; err = nla_parse_nested(tb, LWTUNNEL_IP_OPT_VXLAN_MAX, attr, vxlan_opt_policy, extack); if (err) return err; if (!tb[LWTUNNEL_IP_OPT_VXLAN_GBP]) return -EINVAL; if (info) { struct vxlan_metadata *md = ip_tunnel_info_opts(info) + opts_len; attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; md->gbp = nla_get_u32(attr); md->gbp &= VXLAN_GBP_MASK; info->key.tun_flags |= TUNNEL_VXLAN_OPT; } return sizeof(struct vxlan_metadata); } static int ip_tun_parse_opts_erspan(struct nlattr *attr, struct ip_tunnel_info *info, int opts_len, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1]; int err; u8 ver; err = nla_parse_nested(tb, LWTUNNEL_IP_OPT_ERSPAN_MAX, attr, erspan_opt_policy, extack); if (err) return err; if (!tb[LWTUNNEL_IP_OPT_ERSPAN_VER]) return -EINVAL; ver = nla_get_u8(tb[LWTUNNEL_IP_OPT_ERSPAN_VER]); if (ver == 1) { if (!tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]) return -EINVAL; } else if (ver == 2) { if (!tb[LWTUNNEL_IP_OPT_ERSPAN_DIR] || !tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]) return -EINVAL; } else { return -EINVAL; } if (info) { struct erspan_metadata *md = ip_tunnel_info_opts(info) + opts_len; md->version = ver; if (ver == 1) { attr = tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]; md->u.index = nla_get_be32(attr); } else { attr = tb[LWTUNNEL_IP_OPT_ERSPAN_DIR]; md->u.md2.dir = nla_get_u8(attr); attr = tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]; set_hwid(&md->u.md2, nla_get_u8(attr)); } info->key.tun_flags |= TUNNEL_ERSPAN_OPT; } return sizeof(struct erspan_metadata); } static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, struct netlink_ext_ack *extack) { int err, rem, opt_len, opts_len = 0; struct nlattr *nla; __be16 type = 0; if (!attr) return 0; err = nla_validate(nla_data(attr), nla_len(attr), LWTUNNEL_IP_OPTS_MAX, ip_opts_policy, extack); if (err) return err; nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) { switch (nla_type(nla)) { case LWTUNNEL_IP_OPTS_GENEVE: if (type && type != TUNNEL_GENEVE_OPT) return -EINVAL; opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; if (opts_len > IP_TUNNEL_OPTS_MAX) return -EINVAL; type = TUNNEL_GENEVE_OPT; break; case LWTUNNEL_IP_OPTS_VXLAN: if (type) return -EINVAL; opt_len = ip_tun_parse_opts_vxlan(nla, info, opts_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; type = TUNNEL_VXLAN_OPT; break; case LWTUNNEL_IP_OPTS_ERSPAN: if (type) return -EINVAL; opt_len = ip_tun_parse_opts_erspan(nla, info, opts_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; type = TUNNEL_ERSPAN_OPT; break; default: return -EINVAL; } } return opts_len; } static int ip_tun_get_optlen(struct nlattr *attr, struct netlink_ext_ack *extack) { return ip_tun_parse_opts(attr, NULL, extack); } static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info, struct netlink_ext_ack *extack) { return ip_tun_parse_opts(attr, info, extack); } static int ip_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; struct lwtunnel_state *new_state; struct ip_tunnel_info *tun_info; int err, opt_len; err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, extack); if (err < 0) return err; opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP_OPTS], extack); if (opt_len < 0) return opt_len; new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len); if (!new_state) return -ENOMEM; new_state->type = LWTUNNEL_ENCAP_IP; tun_info = lwt_tun_info(new_state); err = ip_tun_set_opts(tb[LWTUNNEL_IP_OPTS], tun_info, extack); if (err < 0) { lwtstate_free(new_state); return err; } #ifdef CONFIG_DST_CACHE err = dst_cache_init(&tun_info->dst_cache, GFP_KERNEL); if (err) { lwtstate_free(new_state); return err; } #endif if (tb[LWTUNNEL_IP_ID]) tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) tun_info->key.u.ipv4.dst = nla_get_in_addr(tb[LWTUNNEL_IP_DST]); if (tb[LWTUNNEL_IP_SRC]) tun_info->key.u.ipv4.src = nla_get_in_addr(tb[LWTUNNEL_IP_SRC]); if (tb[LWTUNNEL_IP_TTL]) tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); if (tb[LWTUNNEL_IP_TOS]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); if (tb[LWTUNNEL_IP_FLAGS]) tun_info->key.tun_flags |= (nla_get_be16(tb[LWTUNNEL_IP_FLAGS]) & ~TUNNEL_OPTIONS_PRESENT); tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->options_len = opt_len; *ts = new_state; return 0; } static void ip_tun_destroy_state(struct lwtunnel_state *lwtstate) { #ifdef CONFIG_DST_CACHE struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); dst_cache_destroy(&tun_info->dst_cache); #endif } static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { struct geneve_opt *opt; struct nlattr *nest; int offset = 0; nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE); if (!nest) return -ENOMEM; while (tun_info->options_len > offset) { opt = ip_tunnel_info_opts(tun_info) + offset; if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) || nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) || nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4, opt->opt_data)) { nla_nest_cancel(skb, nest); return -ENOMEM; } offset += sizeof(*opt) + opt->length * 4; } nla_nest_end(skb, nest); return 0; } static int ip_tun_fill_encap_opts_vxlan(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { struct vxlan_metadata *md; struct nlattr *nest; nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_VXLAN); if (!nest) return -ENOMEM; md = ip_tunnel_info_opts(tun_info); if (nla_put_u32(skb, LWTUNNEL_IP_OPT_VXLAN_GBP, md->gbp)) { nla_nest_cancel(skb, nest); return -ENOMEM; } nla_nest_end(skb, nest); return 0; } static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { struct erspan_metadata *md; struct nlattr *nest; nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_ERSPAN); if (!nest) return -ENOMEM; md = ip_tunnel_info_opts(tun_info); if (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version)) goto err; if (md->version == 1 && nla_put_be32(skb, LWTUNNEL_IP_OPT_ERSPAN_INDEX, md->u.index)) goto err; if (md->version == 2 && (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_DIR, md->u.md2.dir) || nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_HWID, get_hwid(&md->u.md2)))) goto err; nla_nest_end(skb, nest); return 0; err: nla_nest_cancel(skb, nest); return -ENOMEM; } static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type, struct ip_tunnel_info *tun_info) { struct nlattr *nest; int err = 0; if (!(tun_info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)) return 0; nest = nla_nest_start_noflag(skb, type); if (!nest) return -ENOMEM; if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) err = ip_tun_fill_encap_opts_geneve(skb, tun_info); else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT) err = ip_tun_fill_encap_opts_vxlan(skb, tun_info); else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT) err = ip_tun_fill_encap_opts_erspan(skb, tun_info); if (err) { nla_nest_cancel(skb, nest); return err; } nla_nest_end(skb, nest); return 0; } static int ip_tun_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id, LWTUNNEL_IP_PAD) || nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info)) return -ENOMEM; return 0; } static int ip_tun_opts_nlsize(struct ip_tunnel_info *info) { int opt_len; if (!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)) return 0; opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */ if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { struct geneve_opt *opt; int offset = 0; opt_len += nla_total_size(0); /* LWTUNNEL_IP_OPTS_GENEVE */ while (info->options_len > offset) { opt = ip_tunnel_info_opts(info) + offset; opt_len += nla_total_size(2) /* OPT_GENEVE_CLASS */ + nla_total_size(1) /* OPT_GENEVE_TYPE */ + nla_total_size(opt->length * 4); /* OPT_GENEVE_DATA */ offset += sizeof(*opt) + opt->length * 4; } } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */ + nla_total_size(4); /* OPT_VXLAN_GBP */ } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { struct erspan_metadata *md = ip_tunnel_info_opts(info); opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */ + nla_total_size(1) /* OPT_ERSPAN_VER */ + (md->version == 1 ? nla_total_size(4) /* OPT_ERSPAN_INDEX (v1) */ : nla_total_size(1) + nla_total_size(1)); /* OPT_ERSPAN_DIR + HWID (v2) */ } return opt_len; } static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(8) /* LWTUNNEL_IP_ID */ + nla_total_size(4) /* LWTUNNEL_IP_DST */ + nla_total_size(4) /* LWTUNNEL_IP_SRC */ + nla_total_size(1) /* LWTUNNEL_IP_TOS */ + nla_total_size(1) /* LWTUNNEL_IP_TTL */ + nla_total_size(2) /* LWTUNNEL_IP_FLAGS */ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate)); /* LWTUNNEL_IP_OPTS */ } static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct ip_tunnel_info *info_a = lwt_tun_info(a); struct ip_tunnel_info *info_b = lwt_tun_info(b); return memcmp(info_a, info_b, sizeof(info_a->key)) || info_a->mode != info_b->mode || info_a->options_len != info_b->options_len || memcmp(ip_tunnel_info_opts(info_a), ip_tunnel_info_opts(info_b), info_a->options_len); } static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { .build_state = ip_tun_build_state, .destroy_state = ip_tun_destroy_state, .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, .owner = THIS_MODULE, }; static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { [LWTUNNEL_IP6_UNSPEC] = { .strict_start_type = LWTUNNEL_IP6_OPTS }, [LWTUNNEL_IP6_ID] = { .type = NLA_U64 }, [LWTUNNEL_IP6_DST] = { .len = sizeof(struct in6_addr) }, [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) }, [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 }, [LWTUNNEL_IP6_TC] = { .type = NLA_U8 }, [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 }, [LWTUNNEL_IP6_OPTS] = { .type = NLA_NESTED }, }; static int ip6_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; struct lwtunnel_state *new_state; struct ip_tunnel_info *tun_info; int err, opt_len; err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, extack); if (err < 0) return err; opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP6_OPTS], extack); if (opt_len < 0) return opt_len; new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len); if (!new_state) return -ENOMEM; new_state->type = LWTUNNEL_ENCAP_IP6; tun_info = lwt_tun_info(new_state); err = ip_tun_set_opts(tb[LWTUNNEL_IP6_OPTS], tun_info, extack); if (err < 0) { lwtstate_free(new_state); return err; } if (tb[LWTUNNEL_IP6_ID]) tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]); if (tb[LWTUNNEL_IP6_DST]) tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]); if (tb[LWTUNNEL_IP6_SRC]) tun_info->key.u.ipv6.src = nla_get_in6_addr(tb[LWTUNNEL_IP6_SRC]); if (tb[LWTUNNEL_IP6_HOPLIMIT]) tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP6_HOPLIMIT]); if (tb[LWTUNNEL_IP6_TC]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); if (tb[LWTUNNEL_IP6_FLAGS]) tun_info->key.tun_flags |= (nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]) & ~TUNNEL_OPTIONS_PRESENT); tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; tun_info->options_len = opt_len; *ts = new_state; return 0; } static int ip6_tun_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id, LWTUNNEL_IP6_PAD) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info)) return -ENOMEM; return 0; } static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(8) /* LWTUNNEL_IP6_ID */ + nla_total_size(16) /* LWTUNNEL_IP6_DST */ + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ + nla_total_size(1) /* LWTUNNEL_IP6_TC */ + nla_total_size(2) /* LWTUNNEL_IP6_FLAGS */ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate)); /* LWTUNNEL_IP6_OPTS */ } static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { .build_state = ip6_tun_build_state, .fill_encap = ip6_tun_fill_encap_info, .get_encap_size = ip6_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, .owner = THIS_MODULE, }; void __init ip_tunnel_core_init(void) { /* If you land here, make sure whether increasing ip_tunnel_info's * options_len is a reasonable choice with its usage in front ends * (f.e., it's part of flow keys, etc). */ BUILD_BUG_ON(IP_TUNNEL_OPTS_MAX != 255); lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP); lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6); } DEFINE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); EXPORT_SYMBOL(ip_tunnel_metadata_cnt); void ip_tunnel_need_metadata(void) { static_branch_inc(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_need_metadata); void ip_tunnel_unneed_metadata(void) { static_branch_dec(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); /* Returns either the correct skb->protocol value, or 0 if invalid. */ __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) { if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && ip_hdr(skb)->version == 4) return htons(ETH_P_IP); if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && ipv6_hdr(skb)->version == 6) return htons(ETH_P_IPV6); return 0; } EXPORT_SYMBOL(ip_tunnel_parse_protocol); const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; EXPORT_SYMBOL(ip_tunnel_header_ops); /* This function returns true when ENCAP attributes are present in the nl msg */ bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap) { bool ret = false; memset(encap, 0, sizeof(*encap)); if (!data) return ret; if (data[IFLA_IPTUN_ENCAP_TYPE]) { ret = true; encap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); } if (data[IFLA_IPTUN_ENCAP_FLAGS]) { ret = true; encap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); } if (data[IFLA_IPTUN_ENCAP_SPORT]) { ret = true; encap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); } if (data[IFLA_IPTUN_ENCAP_DPORT]) { ret = true; encap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); } return ret; } EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms); void ip_tunnel_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { if (data[IFLA_IPTUN_LINK]) parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) { parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); if (parms->iph.ttl) parms->iph.frag_off = htons(IP_DF); } if (data[IFLA_IPTUN_TOS]) parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); if (data[IFLA_IPTUN_FLAGS]) parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); if (data[IFLA_IPTUN_PROTO]) parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); } EXPORT_SYMBOL_GPL(ip_tunnel_netlink_parms); |
4275 1261 743 646 2384 4029 4028 4145 4143 4030 2382 4031 4031 2382 2388 2387 2385 2387 1577 4027 2 4031 4025 2287 2343 3895 196 2386 2384 2383 2384 91 91 91 91 91 4292 50 51 11 1057 3620 4027 359 2747 28 349 350 2438 2437 2382 57 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 | // SPDX-License-Identifier: GPL-2.0 /* * KFENCE guarded object allocator and fault handling. * * Copyright (C) 2020, Google LLC. */ #define pr_fmt(fmt) "kfence: " fmt #include <linux/atomic.h> #include <linux/bug.h> #include <linux/debugfs.h> #include <linux/hash.h> #include <linux/irq_work.h> #include <linux/jhash.h> #include <linux/kcsan-checks.h> #include <linux/kfence.h> #include <linux/kmemleak.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/log2.h> #include <linux/memblock.h> #include <linux/moduleparam.h> #include <linux/notifier.h> #include <linux/panic_notifier.h> #include <linux/random.h> #include <linux/rcupdate.h> #include <linux/sched/clock.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> #include <asm/kfence.h> #include "kfence.h" /* Disables KFENCE on the first warning assuming an irrecoverable error. */ #define KFENCE_WARN_ON(cond) \ ({ \ const bool __cond = WARN_ON(cond); \ if (unlikely(__cond)) { \ WRITE_ONCE(kfence_enabled, false); \ disabled_by_warn = true; \ } \ __cond; \ }) /* === Data ================================================================= */ static bool kfence_enabled __read_mostly; static bool disabled_by_warn __read_mostly; unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL; EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */ #ifdef MODULE_PARAM_PREFIX #undef MODULE_PARAM_PREFIX #endif #define MODULE_PARAM_PREFIX "kfence." static int kfence_enable_late(void); static int param_set_sample_interval(const char *val, const struct kernel_param *kp) { unsigned long num; int ret = kstrtoul(val, 0, &num); if (ret < 0) return ret; /* Using 0 to indicate KFENCE is disabled. */ if (!num && READ_ONCE(kfence_enabled)) { pr_info("disabled\n"); WRITE_ONCE(kfence_enabled, false); } *((unsigned long *)kp->arg) = num; if (num && !READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING) return disabled_by_warn ? -EINVAL : kfence_enable_late(); return 0; } static int param_get_sample_interval(char *buffer, const struct kernel_param *kp) { if (!READ_ONCE(kfence_enabled)) return sprintf(buffer, "0\n"); return param_get_ulong(buffer, kp); } static const struct kernel_param_ops sample_interval_param_ops = { .set = param_set_sample_interval, .get = param_get_sample_interval, }; module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600); /* Pool usage% threshold when currently covered allocations are skipped. */ static unsigned long kfence_skip_covered_thresh __read_mostly = 75; module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644); /* If true, use a deferrable timer. */ static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE); module_param_named(deferrable, kfence_deferrable, bool, 0444); /* If true, check all canary bytes on panic. */ static bool kfence_check_on_panic __read_mostly; module_param_named(check_on_panic, kfence_check_on_panic, bool, 0444); /* The pool of pages used for guard pages and objects. */ char *__kfence_pool __read_mostly; EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ /* * Per-object metadata, with one-to-one mapping of object metadata to * backing pages (in __kfence_pool). */ static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0); struct kfence_metadata *kfence_metadata __read_mostly; /* * If kfence_metadata is not NULL, it may be accessed by kfence_shutdown_cache(). * So introduce kfence_metadata_init to initialize metadata, and then make * kfence_metadata visible after initialization is successful. This prevents * potential UAF or access to uninitialized metadata. */ static struct kfence_metadata *kfence_metadata_init __read_mostly; /* Freelist with available objects. */ static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist); static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */ /* * The static key to set up a KFENCE allocation; or if static keys are not used * to gate allocations, to avoid a load and compare if KFENCE is disabled. */ DEFINE_STATIC_KEY_FALSE(kfence_allocation_key); /* Gates the allocation, ensuring only one succeeds in a given period. */ atomic_t kfence_allocation_gate = ATOMIC_INIT(1); /* * A Counting Bloom filter of allocation coverage: limits currently covered * allocations of the same source filling up the pool. * * Assuming a range of 15%-85% unique allocations in the pool at any point in * time, the below parameters provide a probablity of 0.02-0.33 for false * positive hits respectively: * * P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM */ #define ALLOC_COVERED_HNUM 2 #define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2) #define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER) #define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER) #define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1) static atomic_t alloc_covered[ALLOC_COVERED_SIZE]; /* Stack depth used to determine uniqueness of an allocation. */ #define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8) /* * Randomness for stack hashes, making the same collisions across reboots and * different machines less likely. */ static u32 stack_hash_seed __ro_after_init; /* Statistics counters for debugfs. */ enum kfence_counter_id { KFENCE_COUNTER_ALLOCATED, KFENCE_COUNTER_ALLOCS, KFENCE_COUNTER_FREES, KFENCE_COUNTER_ZOMBIES, KFENCE_COUNTER_BUGS, KFENCE_COUNTER_SKIP_INCOMPAT, KFENCE_COUNTER_SKIP_CAPACITY, KFENCE_COUNTER_SKIP_COVERED, KFENCE_COUNTER_COUNT, }; static atomic_long_t counters[KFENCE_COUNTER_COUNT]; static const char *const counter_names[] = { [KFENCE_COUNTER_ALLOCATED] = "currently allocated", [KFENCE_COUNTER_ALLOCS] = "total allocations", [KFENCE_COUNTER_FREES] = "total frees", [KFENCE_COUNTER_ZOMBIES] = "zombie allocations", [KFENCE_COUNTER_BUGS] = "total bugs", [KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)", [KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)", [KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)", }; static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT); /* === Internals ============================================================ */ static inline bool should_skip_covered(void) { unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100; return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh; } static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries) { num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH); num_entries = filter_irq_stacks(stack_entries, num_entries); return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed); } /* * Adds (or subtracts) count @val for allocation stack trace hash * @alloc_stack_hash from Counting Bloom filter. */ static void alloc_covered_add(u32 alloc_stack_hash, int val) { int i; for (i = 0; i < ALLOC_COVERED_HNUM; i++) { atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]); alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); } } /* * Returns true if the allocation stack trace hash @alloc_stack_hash is * currently contained (non-zero count) in Counting Bloom filter. */ static bool alloc_covered_contains(u32 alloc_stack_hash) { int i; for (i = 0; i < ALLOC_COVERED_HNUM; i++) { if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK])) return false; alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); } return true; } static bool kfence_protect(unsigned long addr) { return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true)); } static bool kfence_unprotect(unsigned long addr) { return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false)); } static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta) { unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2; unsigned long pageaddr = (unsigned long)&__kfence_pool[offset]; /* The checks do not affect performance; only called from slow-paths. */ /* Only call with a pointer into kfence_metadata. */ if (KFENCE_WARN_ON(meta < kfence_metadata || meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS)) return 0; /* * This metadata object only ever maps to 1 page; verify that the stored * address is in the expected range. */ if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr)) return 0; return pageaddr; } /* * Update the object's metadata state, including updating the alloc/free stacks * depending on the state transition. */ static noinline void metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next, unsigned long *stack_entries, size_t num_stack_entries) { struct kfence_track *track = next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track; lockdep_assert_held(&meta->lock); if (stack_entries) { memcpy(track->stack_entries, stack_entries, num_stack_entries * sizeof(stack_entries[0])); } else { /* * Skip over 1 (this) functions; noinline ensures we do not * accidentally skip over the caller by never inlining. */ num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); } track->num_stack_entries = num_stack_entries; track->pid = task_pid_nr(current); track->cpu = raw_smp_processor_id(); track->ts_nsec = local_clock(); /* Same source as printk timestamps. */ /* * Pairs with READ_ONCE() in * kfence_shutdown_cache(), * kfence_handle_page_fault(). */ WRITE_ONCE(meta->state, next); } /* Check canary byte at @addr. */ static inline bool check_canary_byte(u8 *addr) { struct kfence_metadata *meta; unsigned long flags; if (likely(*addr == KFENCE_CANARY_PATTERN_U8(addr))) return true; atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); meta = addr_to_metadata((unsigned long)addr); raw_spin_lock_irqsave(&meta->lock, flags); kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION); raw_spin_unlock_irqrestore(&meta->lock, flags); return false; } static inline void set_canary(const struct kfence_metadata *meta) { const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE); unsigned long addr = pageaddr; /* * The canary may be written to part of the object memory, but it does * not affect it. The user should initialize the object before using it. */ for (; addr < meta->addr; addr += sizeof(u64)) *((u64 *)addr) = KFENCE_CANARY_PATTERN_U64; addr = ALIGN_DOWN(meta->addr + meta->size, sizeof(u64)); for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64)) *((u64 *)addr) = KFENCE_CANARY_PATTERN_U64; } static inline void check_canary(const struct kfence_metadata *meta) { const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE); unsigned long addr = pageaddr; /* * We'll iterate over each canary byte per-side until a corrupted byte * is found. However, we'll still iterate over the canary bytes to the * right of the object even if there was an error in the canary bytes to * the left of the object. Specifically, if check_canary_byte() * generates an error, showing both sides might give more clues as to * what the error is about when displaying which bytes were corrupted. */ /* Apply to left of object. */ for (; meta->addr - addr >= sizeof(u64); addr += sizeof(u64)) { if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64)) break; } /* * If the canary is corrupted in a certain 64 bytes, or the canary * memory cannot be completely covered by multiple consecutive 64 bytes, * it needs to be checked one by one. */ for (; addr < meta->addr; addr++) { if (unlikely(!check_canary_byte((u8 *)addr))) break; } /* Apply to right of object. */ for (addr = meta->addr + meta->size; addr % sizeof(u64) != 0; addr++) { if (unlikely(!check_canary_byte((u8 *)addr))) return; } for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64)) { if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64)) { for (; addr - pageaddr < PAGE_SIZE; addr++) { if (!check_canary_byte((u8 *)addr)) return; } } } } static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp, unsigned long *stack_entries, size_t num_stack_entries, u32 alloc_stack_hash) { struct kfence_metadata *meta = NULL; unsigned long flags; struct slab *slab; void *addr; const bool random_right_allocate = get_random_u32_below(2); const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS && !get_random_u32_below(CONFIG_KFENCE_STRESS_TEST_FAULTS); /* Try to obtain a free object. */ raw_spin_lock_irqsave(&kfence_freelist_lock, flags); if (!list_empty(&kfence_freelist)) { meta = list_entry(kfence_freelist.next, struct kfence_metadata, list); list_del_init(&meta->list); } raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); if (!meta) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]); return NULL; } if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) { /* * This is extremely unlikely -- we are reporting on a * use-after-free, which locked meta->lock, and the reporting * code via printk calls kmalloc() which ends up in * kfence_alloc() and tries to grab the same object that we're * reporting on. While it has never been observed, lockdep does * report that there is a possibility of deadlock. Fix it by * using trylock and bailing out gracefully. */ raw_spin_lock_irqsave(&kfence_freelist_lock, flags); /* Put the object back on the freelist. */ list_add_tail(&meta->list, &kfence_freelist); raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); return NULL; } meta->addr = metadata_to_pageaddr(meta); /* Unprotect if we're reusing this page. */ if (meta->state == KFENCE_OBJECT_FREED) kfence_unprotect(meta->addr); /* * Note: for allocations made before RNG initialization, will always * return zero. We still benefit from enabling KFENCE as early as * possible, even when the RNG is not yet available, as this will allow * KFENCE to detect bugs due to earlier allocations. The only downside * is that the out-of-bounds accesses detected are deterministic for * such allocations. */ if (random_right_allocate) { /* Allocate on the "right" side, re-calculate address. */ meta->addr += PAGE_SIZE - size; meta->addr = ALIGN_DOWN(meta->addr, cache->align); } addr = (void *)meta->addr; /* Update remaining metadata. */ metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries); /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */ WRITE_ONCE(meta->cache, cache); meta->size = size; meta->alloc_stack_hash = alloc_stack_hash; raw_spin_unlock_irqrestore(&meta->lock, flags); alloc_covered_add(alloc_stack_hash, 1); /* Set required slab fields. */ slab = virt_to_slab((void *)meta->addr); slab->slab_cache = cache; #if defined(CONFIG_SLUB) slab->objects = 1; #elif defined(CONFIG_SLAB) slab->s_mem = addr; #endif /* Memory initialization. */ set_canary(meta); /* * We check slab_want_init_on_alloc() ourselves, rather than letting * SL*B do the initialization, as otherwise we might overwrite KFENCE's * redzone. */ if (unlikely(slab_want_init_on_alloc(gfp, cache))) memzero_explicit(addr, size); if (cache->ctor) cache->ctor(addr); if (random_fault) kfence_protect(meta->addr); /* Random "faults" by protecting the object. */ atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]); atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]); return addr; } static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie) { struct kcsan_scoped_access assert_page_exclusive; unsigned long flags; bool init; raw_spin_lock_irqsave(&meta->lock, flags); if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) { /* Invalid or double-free, bail out. */ atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_INVALID_FREE); raw_spin_unlock_irqrestore(&meta->lock, flags); return; } /* Detect racy use-after-free, or incorrect reallocation of this page by KFENCE. */ kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE, KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT, &assert_page_exclusive); if (CONFIG_KFENCE_STRESS_TEST_FAULTS) kfence_unprotect((unsigned long)addr); /* To check canary bytes. */ /* Restore page protection if there was an OOB access. */ if (meta->unprotected_page) { memzero_explicit((void *)ALIGN_DOWN(meta->unprotected_page, PAGE_SIZE), PAGE_SIZE); kfence_protect(meta->unprotected_page); meta->unprotected_page = 0; } /* Mark the object as freed. */ metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0); init = slab_want_init_on_free(meta->cache); raw_spin_unlock_irqrestore(&meta->lock, flags); alloc_covered_add(meta->alloc_stack_hash, -1); /* Check canary bytes for memory corruption. */ check_canary(meta); /* * Clear memory if init-on-free is set. While we protect the page, the * data is still there, and after a use-after-free is detected, we * unprotect the page, so the data is still accessible. */ if (!zombie && unlikely(init)) memzero_explicit(addr, meta->size); /* Protect to detect use-after-frees. */ kfence_protect((unsigned long)addr); kcsan_end_scoped_access(&assert_page_exclusive); if (!zombie) { /* Add it to the tail of the freelist for reuse. */ raw_spin_lock_irqsave(&kfence_freelist_lock, flags); KFENCE_WARN_ON(!list_empty(&meta->list)); list_add_tail(&meta->list, &kfence_freelist); raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]); atomic_long_inc(&counters[KFENCE_COUNTER_FREES]); } else { /* See kfence_shutdown_cache(). */ atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]); } } static void rcu_guarded_free(struct rcu_head *h) { struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head); kfence_guarded_free((void *)meta->addr, meta, false); } /* * Initialization of the KFENCE pool after its allocation. * Returns 0 on success; otherwise returns the address up to * which partial initialization succeeded. */ static unsigned long kfence_init_pool(void) { unsigned long addr; struct page *pages; int i; if (!arch_kfence_init_pool()) return (unsigned long)__kfence_pool; addr = (unsigned long)__kfence_pool; pages = virt_to_page(__kfence_pool); /* * Set up object pages: they must have PG_slab set, to avoid freeing * these as real pages. * * We also want to avoid inserting kfence_free() in the kfree() * fast-path in SLUB, and therefore need to ensure kfree() correctly * enters __slab_free() slow-path. */ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { struct slab *slab = page_slab(nth_page(pages, i)); if (!i || (i % 2)) continue; __folio_set_slab(slab_folio(slab)); #ifdef CONFIG_MEMCG slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg | MEMCG_DATA_OBJCGS; #endif } /* * Protect the first 2 pages. The first page is mostly unnecessary, and * merely serves as an extended guard page. However, adding one * additional page in the beginning gives us an even number of pages, * which simplifies the mapping of address to metadata index. */ for (i = 0; i < 2; i++) { if (unlikely(!kfence_protect(addr))) return addr; addr += PAGE_SIZE; } for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { struct kfence_metadata *meta = &kfence_metadata_init[i]; /* Initialize metadata. */ INIT_LIST_HEAD(&meta->list); raw_spin_lock_init(&meta->lock); meta->state = KFENCE_OBJECT_UNUSED; meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */ list_add_tail(&meta->list, &kfence_freelist); /* Protect the right redzone. */ if (unlikely(!kfence_protect(addr + PAGE_SIZE))) goto reset_slab; addr += 2 * PAGE_SIZE; } /* * Make kfence_metadata visible only when initialization is successful. * Otherwise, if the initialization fails and kfence_metadata is freed, * it may cause UAF in kfence_shutdown_cache(). */ smp_store_release(&kfence_metadata, kfence_metadata_init); return 0; reset_slab: for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { struct slab *slab = page_slab(nth_page(pages, i)); if (!i || (i % 2)) continue; #ifdef CONFIG_MEMCG slab->memcg_data = 0; #endif __folio_clear_slab(slab_folio(slab)); } return addr; } static bool __init kfence_init_pool_early(void) { unsigned long addr; if (!__kfence_pool) return false; addr = kfence_init_pool(); if (!addr) { /* * The pool is live and will never be deallocated from this point on. * Ignore the pool object from the kmemleak phys object tree, as it would * otherwise overlap with allocations returned by kfence_alloc(), which * are registered with kmemleak through the slab post-alloc hook. */ kmemleak_ignore_phys(__pa(__kfence_pool)); return true; } /* * Only release unprotected pages, and do not try to go back and change * page attributes due to risk of failing to do so as well. If changing * page attributes for some pages fails, it is very likely that it also * fails for the first page, and therefore expect addr==__kfence_pool in * most failure cases. */ memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); __kfence_pool = NULL; memblock_free_late(__pa(kfence_metadata_init), KFENCE_METADATA_SIZE); kfence_metadata_init = NULL; return false; } /* === DebugFS Interface ==================================================== */ static int stats_show(struct seq_file *seq, void *v) { int i; seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled)); for (i = 0; i < KFENCE_COUNTER_COUNT; i++) seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i])); return 0; } DEFINE_SHOW_ATTRIBUTE(stats); /* * debugfs seq_file operations for /sys/kernel/debug/kfence/objects. * start_object() and next_object() return the object index + 1, because NULL is used * to stop iteration. */ static void *start_object(struct seq_file *seq, loff_t *pos) { if (*pos < CONFIG_KFENCE_NUM_OBJECTS) return (void *)((long)*pos + 1); return NULL; } static void stop_object(struct seq_file *seq, void *v) { } static void *next_object(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; if (*pos < CONFIG_KFENCE_NUM_OBJECTS) return (void *)((long)*pos + 1); return NULL; } static int show_object(struct seq_file *seq, void *v) { struct kfence_metadata *meta = &kfence_metadata[(long)v - 1]; unsigned long flags; raw_spin_lock_irqsave(&meta->lock, flags); kfence_print_object(seq, meta); raw_spin_unlock_irqrestore(&meta->lock, flags); seq_puts(seq, "---------------------------------\n"); return 0; } static const struct seq_operations objects_sops = { .start = start_object, .next = next_object, .stop = stop_object, .show = show_object, }; DEFINE_SEQ_ATTRIBUTE(objects); static int kfence_debugfs_init(void) { struct dentry *kfence_dir; if (!READ_ONCE(kfence_enabled)) return 0; kfence_dir = debugfs_create_dir("kfence", NULL); debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops); debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops); return 0; } late_initcall(kfence_debugfs_init); /* === Panic Notifier ====================================================== */ static void kfence_check_all_canary(void) { int i; for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { struct kfence_metadata *meta = &kfence_metadata[i]; if (meta->state == KFENCE_OBJECT_ALLOCATED) check_canary(meta); } } static int kfence_check_canary_callback(struct notifier_block *nb, unsigned long reason, void *arg) { kfence_check_all_canary(); return NOTIFY_OK; } static struct notifier_block kfence_check_canary_notifier = { .notifier_call = kfence_check_canary_callback, }; /* === Allocation Gate Timer ================================================ */ static struct delayed_work kfence_timer; #ifdef CONFIG_KFENCE_STATIC_KEYS /* Wait queue to wake up allocation-gate timer task. */ static DECLARE_WAIT_QUEUE_HEAD(allocation_wait); static void wake_up_kfence_timer(struct irq_work *work) { wake_up(&allocation_wait); } static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer); #endif /* * Set up delayed work, which will enable and disable the static key. We need to * use a work queue (rather than a simple timer), since enabling and disabling a * static key cannot be done from an interrupt. * * Note: Toggling a static branch currently causes IPIs, and here we'll end up * with a total of 2 IPIs to all CPUs. If this ends up a problem in future (with * more aggressive sampling intervals), we could get away with a variant that * avoids IPIs, at the cost of not immediately capturing allocations if the * instructions remain cached. */ static void toggle_allocation_gate(struct work_struct *work) { if (!READ_ONCE(kfence_enabled)) return; atomic_set(&kfence_allocation_gate, 0); #ifdef CONFIG_KFENCE_STATIC_KEYS /* Enable static key, and await allocation to happen. */ static_branch_enable(&kfence_allocation_key); wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate)); /* Disable static key and reset timer. */ static_branch_disable(&kfence_allocation_key); #endif queue_delayed_work(system_unbound_wq, &kfence_timer, msecs_to_jiffies(kfence_sample_interval)); } /* === Public interface ===================================================== */ void __init kfence_alloc_pool_and_metadata(void) { if (!kfence_sample_interval) return; /* * If the pool has already been initialized by arch, there is no need to * re-allocate the memory pool. */ if (!__kfence_pool) __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); if (!__kfence_pool) { pr_err("failed to allocate pool\n"); return; } /* The memory allocated by memblock has been zeroed out. */ kfence_metadata_init = memblock_alloc(KFENCE_METADATA_SIZE, PAGE_SIZE); if (!kfence_metadata_init) { pr_err("failed to allocate metadata\n"); memblock_free(__kfence_pool, KFENCE_POOL_SIZE); __kfence_pool = NULL; } } static void kfence_init_enable(void) { if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS)) static_branch_enable(&kfence_allocation_key); if (kfence_deferrable) INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate); else INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate); if (kfence_check_on_panic) atomic_notifier_chain_register(&panic_notifier_list, &kfence_check_canary_notifier); WRITE_ONCE(kfence_enabled, true); queue_delayed_work(system_unbound_wq, &kfence_timer, 0); pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE, CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool, (void *)(__kfence_pool + KFENCE_POOL_SIZE)); } void __init kfence_init(void) { stack_hash_seed = get_random_u32(); /* Setting kfence_sample_interval to 0 on boot disables KFENCE. */ if (!kfence_sample_interval) return; if (!kfence_init_pool_early()) { pr_err("%s failed\n", __func__); return; } kfence_init_enable(); } static int kfence_init_late(void) { const unsigned long nr_pages_pool = KFENCE_POOL_SIZE / PAGE_SIZE; const unsigned long nr_pages_meta = KFENCE_METADATA_SIZE / PAGE_SIZE; unsigned long addr = (unsigned long)__kfence_pool; unsigned long free_size = KFENCE_POOL_SIZE; int err = -ENOMEM; #ifdef CONFIG_CONTIG_ALLOC struct page *pages; pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node, NULL); if (!pages) return -ENOMEM; __kfence_pool = page_to_virt(pages); pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node, NULL); if (pages) kfence_metadata_init = page_to_virt(pages); #else if (nr_pages_pool > MAX_ORDER_NR_PAGES || nr_pages_meta > MAX_ORDER_NR_PAGES) { pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n"); return -EINVAL; } __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL); if (!__kfence_pool) return -ENOMEM; kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL); #endif if (!kfence_metadata_init) goto free_pool; memzero_explicit(kfence_metadata_init, KFENCE_METADATA_SIZE); addr = kfence_init_pool(); if (!addr) { kfence_init_enable(); kfence_debugfs_init(); return 0; } pr_err("%s failed\n", __func__); free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool); err = -EBUSY; #ifdef CONFIG_CONTIG_ALLOC free_contig_range(page_to_pfn(virt_to_page((void *)kfence_metadata_init)), nr_pages_meta); free_pool: free_contig_range(page_to_pfn(virt_to_page((void *)addr)), free_size / PAGE_SIZE); #else free_pages_exact((void *)kfence_metadata_init, KFENCE_METADATA_SIZE); free_pool: free_pages_exact((void *)addr, free_size); #endif kfence_metadata_init = NULL; __kfence_pool = NULL; return err; } static int kfence_enable_late(void) { if (!__kfence_pool) return kfence_init_late(); WRITE_ONCE(kfence_enabled, true); queue_delayed_work(system_unbound_wq, &kfence_timer, 0); pr_info("re-enabled\n"); return 0; } void kfence_shutdown_cache(struct kmem_cache *s) { unsigned long flags; struct kfence_metadata *meta; int i; /* Pairs with release in kfence_init_pool(). */ if (!smp_load_acquire(&kfence_metadata)) return; for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { bool in_use; meta = &kfence_metadata[i]; /* * If we observe some inconsistent cache and state pair where we * should have returned false here, cache destruction is racing * with either kmem_cache_alloc() or kmem_cache_free(). Taking * the lock will not help, as different critical section * serialization will have the same outcome. */ if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED) continue; raw_spin_lock_irqsave(&meta->lock, flags); in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED; raw_spin_unlock_irqrestore(&meta->lock, flags); if (in_use) { /* * This cache still has allocations, and we should not * release them back into the freelist so they can still * safely be used and retain the kernel's default * behaviour of keeping the allocations alive (leak the * cache); however, they effectively become "zombie * allocations" as the KFENCE objects are the only ones * still in use and the owning cache is being destroyed. * * We mark them freed, so that any subsequent use shows * more useful error messages that will include stack * traces of the user of the object, the original * allocation, and caller to shutdown_cache(). */ kfence_guarded_free((void *)meta->addr, meta, /*zombie=*/true); } } for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { meta = &kfence_metadata[i]; /* See above. */ if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED) continue; raw_spin_lock_irqsave(&meta->lock, flags); if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED) meta->cache = NULL; raw_spin_unlock_irqrestore(&meta->lock, flags); } } void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { unsigned long stack_entries[KFENCE_STACK_DEPTH]; size_t num_stack_entries; u32 alloc_stack_hash; /* * Perform size check before switching kfence_allocation_gate, so that * we don't disable KFENCE without making an allocation. */ if (size > PAGE_SIZE) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; } /* * Skip allocations from non-default zones, including DMA. We cannot * guarantee that pages in the KFENCE pool will have the requested * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; } /* * Skip allocations for this slab, if KFENCE has been disabled for * this slab. */ if (s->flags & SLAB_SKIP_KFENCE) return NULL; if (atomic_inc_return(&kfence_allocation_gate) > 1) return NULL; #ifdef CONFIG_KFENCE_STATIC_KEYS /* * waitqueue_active() is fully ordered after the update of * kfence_allocation_gate per atomic_inc_return(). */ if (waitqueue_active(&allocation_wait)) { /* * Calling wake_up() here may deadlock when allocations happen * from within timer code. Use an irq_work to defer it. */ irq_work_queue(&wake_up_kfence_timer_work); } #endif if (!READ_ONCE(kfence_enabled)) return NULL; num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0); /* * Do expensive check for coverage of allocation in slow-path after * allocation_gate has already become non-zero, even though it might * mean not making any allocation within a given sample interval. * * This ensures reasonable allocation coverage when the pool is almost * full, including avoiding long-lived allocations of the same source * filling up the pool (e.g. pagecache allocations). */ alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries); if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]); return NULL; } return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries, alloc_stack_hash); } size_t kfence_ksize(const void *addr) { const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); /* * Read locklessly -- if there is a race with __kfence_alloc(), this is * either a use-after-free or invalid access. */ return meta ? meta->size : 0; } void *kfence_object_start(const void *addr) { const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); /* * Read locklessly -- if there is a race with __kfence_alloc(), this is * either a use-after-free or invalid access. */ return meta ? (void *)meta->addr : NULL; } void __kfence_free(void *addr) { struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); #ifdef CONFIG_MEMCG KFENCE_WARN_ON(meta->objcg); #endif /* * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing * the object, as the object page may be recycled for other-typed * objects once it has been freed. meta->cache may be NULL if the cache * was destroyed. */ if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU))) call_rcu(&meta->rcu_head, rcu_guarded_free); else kfence_guarded_free(addr, meta, false); } bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs) { const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE; struct kfence_metadata *to_report = NULL; enum kfence_error_type error_type; unsigned long flags; if (!is_kfence_address((void *)addr)) return false; if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */ return kfence_unprotect(addr); /* ... unprotect and proceed. */ atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); if (page_index % 2) { /* This is a redzone, report a buffer overflow. */ struct kfence_metadata *meta; int distance = 0; meta = addr_to_metadata(addr - PAGE_SIZE); if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) { to_report = meta; /* Data race ok; distance calculation approximate. */ distance = addr - data_race(meta->addr + meta->size); } meta = addr_to_metadata(addr + PAGE_SIZE); if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) { /* Data race ok; distance calculation approximate. */ if (!to_report || distance > data_race(meta->addr) - addr) to_report = meta; } if (!to_report) goto out; raw_spin_lock_irqsave(&to_report->lock, flags); to_report->unprotected_page = addr; error_type = KFENCE_ERROR_OOB; /* * If the object was freed before we took the look we can still * report this as an OOB -- the report will simply show the * stacktrace of the free as well. */ } else { to_report = addr_to_metadata(addr); if (!to_report) goto out; raw_spin_lock_irqsave(&to_report->lock, flags); error_type = KFENCE_ERROR_UAF; /* * We may race with __kfence_alloc(), and it is possible that a * freed object may be reallocated. We simply report this as a * use-after-free, with the stack trace showing the place where * the object was re-allocated. */ } out: if (to_report) { kfence_report_error(addr, is_write, regs, to_report, error_type); raw_spin_unlock_irqrestore(&to_report->lock, flags); } else { /* This may be a UAF or OOB access, but we can't be sure. */ kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID); } return kfence_unprotect(addr); /* Unprotect and let access proceed. */ } |
12 2 2 8 10 5 1 5 3 5 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | // SPDX-License-Identifier: GPL-2.0+ /* * NILFS regular file handling primitives including fsync(). * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Amagai Yoshiji and Ryusuke Konishi. */ #include <linux/fs.h> #include <linux/mm.h> #include <linux/writeback.h> #include "nilfs.h" #include "segment.h" int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { /* * Called from fsync() system call * This is the only entry point that can catch write and synch * timing for both data blocks and intermediate blocks. * * This function should be implemented when the writeback function * will be implemented. */ struct the_nilfs *nilfs; struct inode *inode = file->f_mapping->host; int err = 0; if (nilfs_inode_dirty(inode)) { if (datasync) err = nilfs_construct_dsync_segment(inode->i_sb, inode, start, end); else err = nilfs_construct_segment(inode->i_sb); } nilfs = inode->i_sb->s_fs_info; if (!err) err = nilfs_flush_device(nilfs); return err; } static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; int ret = 0; if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ sb_start_pagefault(inode->i_sb); lock_page(page); if (page->mapping != inode->i_mapping || page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); ret = -EFAULT; /* make the VM retry the fault */ goto out; } /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) goto mapped; if (page_has_buffers(page)) { struct buffer_head *bh, *head; int fully_mapped = 1; bh = head = page_buffers(page); do { if (!buffer_mapped(bh)) { fully_mapped = 0; break; } } while (bh = bh->b_this_page, bh != head); if (fully_mapped) { SetPageMappedToDisk(page); goto mapped; } } unlock_page(page); /* * fill hole blocks */ ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); /* never returns -ENOMEM, but may return -ENOSPC */ if (unlikely(ret)) goto out; file_update_time(vma->vm_file); ret = block_page_mkwrite(vma, vmf, nilfs_get_block); if (ret) { nilfs_transaction_abort(inode->i_sb); goto out; } nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); nilfs_transaction_commit(inode->i_sb); mapped: wait_for_stable_page(page); out: sb_end_pagefault(inode->i_sb); return vmf_fs_error(ret); } static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = nilfs_page_mkwrite, }; static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &nilfs_file_vm_ops; return 0; } /* * We have mostly NULL's here: the current defaults are ok for * the nilfs filesystem. */ const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ .mmap = nilfs_file_mmap, .open = generic_file_open, /* .release = nilfs_release_file, */ .fsync = nilfs_sync_file, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, }; const struct inode_operations nilfs_file_inode_operations = { .setattr = nilfs_setattr, .permission = nilfs_permission, .fiemap = nilfs_fiemap, .fileattr_get = nilfs_fileattr_get, .fileattr_set = nilfs_fileattr_set, }; /* end of file */ |
21 21 1 19 1 12 1 1 5 9 1 2 1 4 2 2 1 3 2 1 3 3 3 2 1 3 2 1 3 21 21 18 18 18 18 21 21 21 3 3 1 3 3 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_flow.c Generic flow classifier * * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/list.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/pkt_cls.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <linux/slab.h> #include <linux/module.h> #include <net/inet_sock.h> #include <net/pkt_cls.h> #include <net/ip.h> #include <net/route.h> #include <net/flow_dissector.h> #include <net/tc_wrapper.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif struct flow_head { struct list_head filters; struct rcu_head rcu; }; struct flow_filter { struct list_head list; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; struct timer_list perturb_timer; u32 perturb_period; u32 handle; u32 nkeys; u32 keymask; u32 mode; u32 mask; u32 xor; u32 rshift; u32 addend; u32 divisor; u32 baseclass; u32 hashrnd; struct rcu_work rwork; }; static inline u32 addr_fold(void *addr) { unsigned long a = (unsigned long)addr; return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); } static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { __be32 src = flow_get_u32_src(flow); if (src) return ntohl(src); return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { __be32 dst = flow_get_u32_dst(flow); if (dst) return ntohl(dst); return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { return flow->basic.ip_proto; } static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); return addr_fold(skb->sk); } static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_iif(const struct sk_buff *skb) { return skb->skb_iif; } static u32 flow_get_priority(const struct sk_buff *skb) { return skb->priority; } static u32 flow_get_mark(const struct sk_buff *skb) { return skb->mark; } static u32 flow_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return addr_fold(skb_nfct(skb)); #else return 0; #endif } #if IS_ENABLED(CONFIG_NF_CONNTRACK) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ if (ct == NULL) \ goto fallback; \ ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ }) #else #define CTTUPLE(skb, member) \ ({ \ goto fallback; \ 0; \ }) #endif static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: return flow_get_src(skb, flow); } static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: return flow_get_dst(skb, flow); } static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: return flow_get_proto_dst(skb, flow); } static u32 flow_get_rtclassid(const struct sk_buff *skb) { #ifdef CONFIG_IP_ROUTE_CLASSID if (skb_dst(skb)) return skb_dst(skb)->tclassid; #endif return 0; } static u32 flow_get_skuid(const struct sk_buff *skb) { struct sock *sk = skb_to_full_sk(skb); if (sk && sk->sk_socket && sk->sk_socket->file) { kuid_t skuid = sk->sk_socket->file->f_cred->fsuid; return from_kuid(&init_user_ns, skuid); } return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { struct sock *sk = skb_to_full_sk(skb); if (sk && sk->sk_socket && sk->sk_socket->file) { kgid_t skgid = sk->sk_socket->file->f_cred->fsgid; return from_kgid(&init_user_ns, skgid); } return 0; } static u32 flow_get_vlan_tag(const struct sk_buff *skb) { u16 tag; if (vlan_get_tag(skb, &tag) < 0) return 0; return tag & VLAN_VID_MASK; } static u32 flow_get_rxhash(struct sk_buff *skb) { return skb_get_hash(skb); } static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) { switch (key) { case FLOW_KEY_SRC: return flow_get_src(skb, flow); case FLOW_KEY_DST: return flow_get_dst(skb, flow); case FLOW_KEY_PROTO: return flow_get_proto(skb, flow); case FLOW_KEY_PROTO_SRC: return flow_get_proto_src(skb, flow); case FLOW_KEY_PROTO_DST: return flow_get_proto_dst(skb, flow); case FLOW_KEY_IIF: return flow_get_iif(skb); case FLOW_KEY_PRIORITY: return flow_get_priority(skb); case FLOW_KEY_MARK: return flow_get_mark(skb); case FLOW_KEY_NFCT: return flow_get_nfct(skb); case FLOW_KEY_NFCT_SRC: return flow_get_nfct_src(skb, flow); case FLOW_KEY_NFCT_DST: return flow_get_nfct_dst(skb, flow); case FLOW_KEY_NFCT_PROTO_SRC: return flow_get_nfct_proto_src(skb, flow); case FLOW_KEY_NFCT_PROTO_DST: return flow_get_nfct_proto_dst(skb, flow); case FLOW_KEY_RTCLASSID: return flow_get_rtclassid(skb); case FLOW_KEY_SKUID: return flow_get_skuid(skb); case FLOW_KEY_SKGID: return flow_get_skgid(skb); case FLOW_KEY_VLAN_TAG: return flow_get_vlan_tag(skb); case FLOW_KEY_RXHASH: return flow_get_rxhash(skb); default: WARN_ON(1); return 0; } } #define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \ (1 << FLOW_KEY_DST) | \ (1 << FLOW_KEY_PROTO) | \ (1 << FLOW_KEY_PROTO_SRC) | \ (1 << FLOW_KEY_PROTO_DST) | \ (1 << FLOW_KEY_NFCT_SRC) | \ (1 << FLOW_KEY_NFCT_DST) | \ (1 << FLOW_KEY_NFCT_PROTO_SRC) | \ (1 << FLOW_KEY_NFCT_PROTO_DST)) TC_INDIRECT_SCOPE int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct flow_head *head = rcu_dereference_bh(tp->root); struct flow_filter *f; u32 keymask; u32 classid; unsigned int n, key; int r; list_for_each_entry_rcu(f, &head->filters, list) { u32 keys[FLOW_KEY_MAX + 1]; struct flow_keys flow_keys; if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; keymask = f->keymask; if (keymask & FLOW_KEYS_NEEDED) skb_flow_dissect_flow_keys(skb, &flow_keys, 0); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; keymask &= ~(1 << key); keys[n] = flow_key_get(skb, key, &flow_keys); } if (f->mode == FLOW_MODE_HASH) classid = jhash2(keys, f->nkeys, f->hashrnd); else { classid = keys[0]; classid = (classid & f->mask) ^ f->xor; classid = (classid >> f->rshift) + f->addend; } if (f->divisor) classid %= f->divisor; res->class = 0; res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) continue; return r; } return -1; } static void flow_perturbation(struct timer_list *t) { struct flow_filter *f = from_timer(f, t, perturb_timer); get_random_bytes(&f->hashrnd, 4); if (f->perturb_period) mod_timer(&f->perturb_timer, jiffies + f->perturb_period); } static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, [TCA_FLOW_MASK] = { .type = NLA_U32 }, [TCA_FLOW_XOR] = { .type = NLA_U32 }, [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, [TCA_FLOW_ACT] = { .type = NLA_NESTED }, [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; static void __flow_destroy_filter(struct flow_filter *f) { timer_shutdown_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); kfree(f); } static void flow_destroy_filter_work(struct work_struct *work) { struct flow_filter *f = container_of(to_rcu_work(work), struct flow_filter, rwork); rtnl_lock(); __flow_destroy_filter(f); rtnl_unlock(); } static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FLOW_MAX + 1]; unsigned int nkeys = 0; unsigned int perturb_period = 0; u32 baseclass = 0; u32 keymask = 0; u32 mode; int err; if (opt == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); if (err < 0) return err; if (tb[TCA_FLOW_BASECLASS]) { baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); if (TC_H_MIN(baseclass) == 0) return -EINVAL; } if (tb[TCA_FLOW_KEYS]) { keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); nkeys = hweight32(keymask); if (nkeys == 0) return -EINVAL; if (fls(keymask) - 1 > FLOW_KEY_MAX) return -EOPNOTSUPP; if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns) return -EOPNOTSUPP; } fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); if (!fnew) return -ENOBUFS; err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches); if (err < 0) goto err1; err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE); if (err < 0) goto err2; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags, extack); if (err < 0) goto err2; fold = *arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) goto err2; /* Copy fold into fnew */ fnew->tp = fold->tp; fnew->handle = fold->handle; fnew->nkeys = fold->nkeys; fnew->keymask = fold->keymask; fnew->mode = fold->mode; fnew->mask = fold->mask; fnew->xor = fold->xor; fnew->rshift = fold->rshift; fnew->addend = fold->addend; fnew->divisor = fold->divisor; fnew->baseclass = fold->baseclass; fnew->hashrnd = fold->hashrnd; mode = fold->mode; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) goto err2; if (!tb[TCA_FLOW_KEYS]) goto err2; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } if (TC_H_MAJ(baseclass) == 0) { struct Qdisc *q = tcf_block_q(tp->chain->block); baseclass = TC_H_MAKE(q->handle, baseclass); } if (TC_H_MIN(baseclass) == 0) baseclass = TC_H_MAKE(baseclass, 1); fnew->handle = handle; fnew->mask = ~0U; fnew->tp = tp; get_random_bytes(&fnew->hashrnd, 4); } timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); tcf_block_netif_keep_dst(tp->chain->block); if (tb[TCA_FLOW_KEYS]) { fnew->keymask = keymask; fnew->nkeys = nkeys; } fnew->mode = mode; if (tb[TCA_FLOW_MASK]) fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]); if (tb[TCA_FLOW_XOR]) fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]); if (tb[TCA_FLOW_RSHIFT]) fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); if (tb[TCA_FLOW_ADDEND]) fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); if (tb[TCA_FLOW_DIVISOR]) fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); if (baseclass) fnew->baseclass = baseclass; fnew->perturb_period = perturb_period; if (perturb_period) mod_timer(&fnew->perturb_timer, jiffies + perturb_period); if (!*arg) list_add_tail_rcu(&fnew->list, &head->filters); else list_replace_rcu(&fold->list, &fnew->list); *arg = fnew; if (fold) { tcf_exts_get_net(&fold->exts); tcf_queue_work(&fold->rwork, flow_destroy_filter_work); } return 0; err2: tcf_exts_destroy(&fnew->exts); tcf_em_tree_destroy(&fnew->ematches); err1: kfree(fnew); return err; } static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; list_del_rcu(&f->list); tcf_exts_get_net(&f->exts); tcf_queue_work(&f->rwork, flow_destroy_filter_work); *last = list_empty(&head->filters); return 0; } static int flow_init(struct tcf_proto *tp) { struct flow_head *head; head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->filters); rcu_assign_pointer(tp->root, head); return 0; } static void flow_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); if (tcf_exts_get_net(&f->exts)) tcf_queue_work(&f->rwork, flow_destroy_filter_work); else __flow_destroy_filter(f); } kfree_rcu(head, rcu); } static void *flow_get(struct tcf_proto *tp, u32 handle) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) if (f->handle == handle) return f; return NULL; } static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct flow_filter *f = fh; struct nlattr *nest; if (f == NULL) return skb->len; t->tcm_handle = f->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) || nla_put_u32(skb, TCA_FLOW_MODE, f->mode)) goto nla_put_failure; if (f->mask != ~0 || f->xor != 0) { if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) || nla_put_u32(skb, TCA_FLOW_XOR, f->xor)) goto nla_put_failure; } if (f->rshift && nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift)) goto nla_put_failure; if (f->addend && nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend)) goto nla_put_failure; if (f->divisor && nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor)) goto nla_put_failure; if (f->baseclass && nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass)) goto nla_put_failure; if (f->perturb_period && nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ)) goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH if (f->ematches.hdr.nmatches && tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) goto nla_put_failure; #endif nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) { if (!tc_cls_stats_dump(tp, arg, f)) break; } } static struct tcf_proto_ops cls_flow_ops __read_mostly = { .kind = "flow", .classify = flow_classify, .init = flow_init, .destroy = flow_destroy, .change = flow_change, .delete = flow_delete, .get = flow_get, .dump = flow_dump, .walk = flow_walk, .owner = THIS_MODULE, }; static int __init cls_flow_init(void) { return register_tcf_proto_ops(&cls_flow_ops); } static void __exit cls_flow_exit(void) { unregister_tcf_proto_ops(&cls_flow_ops); } module_init(cls_flow_init); module_exit(cls_flow_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("TC flow classifier"); |
1 1 1 1 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2019 HUAWEI, Inc. * https://www.huawei.com/ */ #include "compress.h" #include <linux/lz4.h> #ifndef LZ4_DISTANCE_MAX /* history window size */ #define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ #endif #define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1) #ifndef LZ4_DECOMPRESS_INPLACE_MARGIN #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) #endif struct z_erofs_lz4_decompress_ctx { struct z_erofs_decompress_req *rq; /* # of encoded, decoded pages */ unsigned int inpages, outpages; /* decoded block total length (used for in-place decompression) */ unsigned int oend; }; static int z_erofs_load_lz4_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size) { struct erofs_sb_info *sbi = EROFS_SB(sb); struct z_erofs_lz4_cfgs *lz4 = data; u16 distance; if (lz4) { if (size < sizeof(struct z_erofs_lz4_cfgs)) { erofs_err(sb, "invalid lz4 cfgs, size=%u", size); return -EINVAL; } distance = le16_to_cpu(lz4->max_distance); sbi->lz4.max_pclusterblks = le16_to_cpu(lz4->max_pclusterblks); if (!sbi->lz4.max_pclusterblks) { sbi->lz4.max_pclusterblks = 1; /* reserved case */ } else if (sbi->lz4.max_pclusterblks > erofs_blknr(sb, Z_EROFS_PCLUSTER_MAX_SIZE)) { erofs_err(sb, "too large lz4 pclusterblks %u", sbi->lz4.max_pclusterblks); return -EINVAL; } } else { distance = le16_to_cpu(dsb->u1.lz4_max_distance); sbi->lz4.max_pclusterblks = 1; } sbi->lz4.max_distance_pages = distance ? DIV_ROUND_UP(distance, PAGE_SIZE) + 1 : LZ4_MAX_DISTANCE_PAGES; return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks); } /* * Fill all gaps with bounce pages if it's a sparse page list. Also check if * all physical pages are consecutive, which can be seen for moderate CR. */ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, struct page **pagepool) { struct z_erofs_decompress_req *rq = ctx->rq; struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, BITS_PER_LONG)] = { 0 }; unsigned int lz4_max_distance_pages = EROFS_SB(rq->sb)->lz4.max_distance_pages; void *kaddr = NULL; unsigned int i, j, top; top = 0; for (i = j = 0; i < ctx->outpages; ++i, ++j) { struct page *const page = rq->out[i]; struct page *victim; if (j >= lz4_max_distance_pages) j = 0; /* 'valid' bounced can only be tested after a complete round */ if (!rq->fillgaps && test_bit(j, bounced)) { DBG_BUGON(i < lz4_max_distance_pages); DBG_BUGON(top >= lz4_max_distance_pages); availables[top++] = rq->out[i - lz4_max_distance_pages]; } if (page) { __clear_bit(j, bounced); if (!PageHighMem(page)) { if (!i) { kaddr = page_address(page); continue; } if (kaddr && kaddr + PAGE_SIZE == page_address(page)) { kaddr += PAGE_SIZE; continue; } } kaddr = NULL; continue; } kaddr = NULL; __set_bit(j, bounced); if (top) { victim = availables[--top]; get_page(victim); } else { victim = erofs_allocpage(pagepool, GFP_KERNEL | __GFP_NOFAIL); set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); } rq->out[i] = victim; } return kaddr ? 1 : 0; } static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, void *inpage, unsigned int *inputmargin, int *maptype, bool may_inplace) { struct z_erofs_decompress_req *rq = ctx->rq; unsigned int omargin, total, i, j; struct page **in; void *src, *tmp; if (rq->inplace_io) { omargin = PAGE_ALIGN(ctx->oend) - ctx->oend; if (rq->partial_decoding || !may_inplace || omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) goto docopy; for (i = 0; i < ctx->inpages; ++i) { DBG_BUGON(rq->in[i] == NULL); for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j) if (rq->out[j] == rq->in[i]) goto docopy; } } if (ctx->inpages <= 1) { *maptype = 0; return inpage; } kunmap_local(inpage); might_sleep(); src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) return ERR_PTR(-ENOMEM); *maptype = 1; return src; docopy: /* Or copy compressed data which can be overlapped to per-CPU buffer */ in = rq->in; src = erofs_get_pcpubuf(ctx->inpages); if (!src) { DBG_BUGON(1); kunmap_local(inpage); return ERR_PTR(-EFAULT); } tmp = src; total = rq->inputsize; while (total) { unsigned int page_copycnt = min_t(unsigned int, total, PAGE_SIZE - *inputmargin); if (!inpage) inpage = kmap_local_page(*in); memcpy(tmp, inpage + *inputmargin, page_copycnt); kunmap_local(inpage); inpage = NULL; tmp += page_copycnt; total -= page_copycnt; ++in; *inputmargin = 0; } *maptype = 2; return src; } /* * Get the exact inputsize with zero_padding feature. * - For LZ4, it should work if zero_padding feature is on (5.3+); * - For MicroLZMA, it'd be enabled all the time. */ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, unsigned int padbufsize) { const char *padend; padend = memchr_inv(padbuf, 0, padbufsize); if (!padend) return -EFSCORRUPTED; rq->inputsize -= padend - padbuf; rq->pageofs_in += padend - padbuf; return 0; } static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, u8 *out) { struct z_erofs_decompress_req *rq = ctx->rq; bool support_0padding = false, may_inplace = false; unsigned int inputmargin; u8 *headpage, *src; int ret, maptype; DBG_BUGON(*rq->in == NULL); headpage = kmap_local_page(*rq->in); /* LZ4 decompression inplace is only safe if zero_padding is enabled */ if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) { support_0padding = true; ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in, min_t(unsigned int, rq->inputsize, rq->sb->s_blocksize - rq->pageofs_in)); if (ret) { kunmap_local(headpage); return ret; } may_inplace = !((rq->pageofs_in + rq->inputsize) & (rq->sb->s_blocksize - 1)); } inputmargin = rq->pageofs_in; src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin, &maptype, may_inplace); if (IS_ERR(src)) return PTR_ERR(src); /* legacy format could compress extra data in a pcluster. */ if (rq->partial_decoding || !support_0padding) ret = LZ4_decompress_safe_partial(src + inputmargin, out, rq->inputsize, rq->outputsize, rq->outputsize); else ret = LZ4_decompress_safe(src + inputmargin, out, rq->inputsize, rq->outputsize); if (ret != rq->outputsize) { erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]", ret, rq->inputsize, inputmargin, rq->outputsize); print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, 16, 1, src + inputmargin, rq->inputsize, true); print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, 16, 1, out, rq->outputsize, true); if (ret >= 0) memset(out + ret, 0, rq->outputsize - ret); ret = -EIO; } else { ret = 0; } if (maptype == 0) { kunmap_local(headpage); } else if (maptype == 1) { vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { erofs_put_pcpubuf(src); } else { DBG_BUGON(1); return -EFAULT; } return ret; } static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool) { struct z_erofs_lz4_decompress_ctx ctx; unsigned int dst_maptype; void *dst; int ret; ctx.rq = rq; ctx.oend = rq->pageofs_out + rq->outputsize; ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT; ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; /* one optimized fast path only for non bigpcluster cases yet */ if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); dst = kmap_local_page(*rq->out); dst_maptype = 0; goto dstmap_out; } /* general decoding path which can be used for all cases */ ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool); if (ret < 0) { return ret; } else if (ret > 0) { dst = page_address(*rq->out); dst_maptype = 1; } else { dst = erofs_vm_map_ram(rq->out, ctx.outpages); if (!dst) return -ENOMEM; dst_maptype = 2; } dstmap_out: ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); if (!dst_maptype) kunmap_local(dst); else if (dst_maptype == 2) vm_unmap_ram(dst, ctx.outpages); return ret; } static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, struct page **pagepool) { const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; const unsigned int outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; const unsigned int righthalf = min_t(unsigned int, rq->outputsize, PAGE_SIZE - rq->pageofs_out); const unsigned int lefthalf = rq->outputsize - righthalf; const unsigned int interlaced_offset = rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out; u8 *src; if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) { DBG_BUGON(1); return -EFSCORRUPTED; } if (rq->out[0] == *rq->in) { DBG_BUGON(rq->pageofs_out); return 0; } src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in; if (rq->out[0]) memcpy_to_page(rq->out[0], rq->pageofs_out, src + interlaced_offset, righthalf); if (outpages > inpages) { DBG_BUGON(!rq->out[outpages - 1]); if (rq->out[outpages - 1] != rq->in[inpages - 1]) { memcpy_to_page(rq->out[outpages - 1], 0, src + (interlaced_offset ? 0 : righthalf), lefthalf); } else if (!interlaced_offset) { memmove(src, src + righthalf, lefthalf); flush_dcache_page(rq->in[inpages - 1]); } } kunmap_local(src); return 0; } const struct z_erofs_decompressor erofs_decompressors[] = { [Z_EROFS_COMPRESSION_SHIFTED] = { .decompress = z_erofs_transform_plain, .name = "shifted" }, [Z_EROFS_COMPRESSION_INTERLACED] = { .decompress = z_erofs_transform_plain, .name = "interlaced" }, [Z_EROFS_COMPRESSION_LZ4] = { .config = z_erofs_load_lz4_config, .decompress = z_erofs_lz4_decompress, .name = "lz4" }, #ifdef CONFIG_EROFS_FS_ZIP_LZMA [Z_EROFS_COMPRESSION_LZMA] = { .config = z_erofs_load_lzma_config, .decompress = z_erofs_lzma_decompress, .name = "lzma" }, #endif #ifdef CONFIG_EROFS_FS_ZIP_DEFLATE [Z_EROFS_COMPRESSION_DEFLATE] = { .config = z_erofs_load_deflate_config, .decompress = z_erofs_deflate_decompress, .name = "deflate" }, #endif }; int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) { struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; unsigned int algs, alg; erofs_off_t offset; int size, ret = 0; if (!erofs_sb_has_compr_cfgs(sbi)) { sbi->available_compr_algs = Z_EROFS_COMPRESSION_LZ4; return z_erofs_load_lz4_config(sb, dsb, NULL, 0); } sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { erofs_err(sb, "unidentified algorithms %x, please upgrade kernel", sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); return -EOPNOTSUPP; } erofs_init_metabuf(&buf, sb); offset = EROFS_SUPER_OFFSET + sbi->sb_size; alg = 0; for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { void *data; if (!(algs & 1)) continue; data = erofs_read_metadata(sb, &buf, &offset, &size); if (IS_ERR(data)) { ret = PTR_ERR(data); break; } if (alg >= ARRAY_SIZE(erofs_decompressors) || !erofs_decompressors[alg].config) { erofs_err(sb, "algorithm %d isn't enabled on this kernel", alg); ret = -EOPNOTSUPP; } else { ret = erofs_decompressors[alg].config(sb, dsb, data, size); } kfree(data); if (ret) break; } erofs_put_metabuf(&buf); return ret; } |
24 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | /* SPDX-License-Identifier: GPL-2.0 */ /* Freezer declarations */ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED #include <linux/debug_locks.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> #include <linux/jump_label.h> #ifdef CONFIG_FREEZER DECLARE_STATIC_KEY_FALSE(freezer_active); extern bool pm_freezing; /* PM freezing in effect */ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ /* * Timeout for stopping processes */ extern unsigned int freeze_timeout_msecs; /* * Check if a process has been frozen */ extern bool frozen(struct task_struct *p); extern bool freezing_slow_path(struct task_struct *p); /* * Check if there is a request to freeze a process */ static inline bool freezing(struct task_struct *p) { if (static_branch_unlikely(&freezer_active)) return freezing_slow_path(p); return false; } /* Takes and releases task alloc lock using task_lock() */ extern void __thaw_task(struct task_struct *t); extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); static inline bool try_to_freeze(void) { might_sleep(); if (likely(!freezing(current))) return false; if (!(current->flags & PF_NOFREEZE)) debug_check_no_locks_held(); return __refrigerator(false); } extern bool freeze_task(struct task_struct *p); extern bool set_freezable(void); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); #else /* !CONFIG_CGROUP_FREEZER */ static inline bool cgroup_freezing(struct task_struct *task) { return false; } #endif /* !CONFIG_CGROUP_FREEZER */ #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } static inline void __thaw_task(struct task_struct *t) {} static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline void thaw_kernel_threads(void) {} static inline bool try_to_freeze(void) { return false; } static inline void set_freezable(void) {} #endif /* !CONFIG_FREEZER */ #endif /* FREEZER_H_INCLUDED */ |
41 4 2 2 12 3 10 2 11 4 7 11 5 5 3 2 1 13 13 9 4 2 41 10 31 1 30 9 39 17 1 16 8 7 1 2 6 6 2 8 8 8 1 6 1 7 7 1 2 3 2 3 1 2 1 20 31 11 11 7 4 53 2 1 49 2 43 8 46 2 3 49 13 14 22 1 5 20 13 4 13 1 10 3 7 1 2 1 6 1 12 12 1 9 1 9 4 1 8 7 1 12 2 1 7 1 1 7 1 2 2 3 3 2 5 1 1 1 1 58 58 49 2 8 21 2 1 1 17 16 16 16 16 16 10 3 7 6 6 6 6 5 90 2 89 84 18 84 81 20 9 20 62 12 73 76 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/net.h> #include <linux/compat.h> #include <net/compat.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "kbuf.h" #include "alloc_cache.h" #include "net.h" #include "notif.h" #include "rsrc.h" #if defined(CONFIG_NET) struct io_shutdown { struct file *file; int how; }; struct io_accept { struct file *file; struct sockaddr __user *addr; int __user *addr_len; int flags; u32 file_slot; unsigned long nofile; }; struct io_socket { struct file *file; int domain; int type; int protocol; int flags; u32 file_slot; unsigned long nofile; }; struct io_connect { struct file *file; struct sockaddr __user *addr; int addr_len; bool in_progress; bool seen_econnaborted; }; struct io_sr_msg { struct file *file; union { struct compat_msghdr __user *umsg_compat; struct user_msghdr __user *umsg; void __user *buf; }; unsigned len; unsigned done_io; unsigned msg_flags; u16 flags; /* initialised and used only by !msg send variants */ u16 addr_len; u16 buf_group; void __user *addr; void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; }; static inline bool io_check_multishot(struct io_kiocb *req, unsigned int issue_flags) { /* * When ->locked_cq is set we only allow to post CQEs from the original * task context. Usual request completions will be handled in other * generic paths but multipoll may decide to post extra cqes. */ return !(issue_flags & IO_URING_F_IOWQ) || !(issue_flags & IO_URING_F_MULTISHOT) || !req->ctx->task_complete; } int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)) return -EINVAL; shutdown->how = READ_ONCE(sqe->len); req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); struct socket *sock; int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; ret = __sys_shutdown_sock(sock, shutdown->how); io_req_set_res(req, ret, 0); return IOU_OK; } static bool io_net_retry(struct socket *sock, int flags) { if (!(flags & MSG_WAITALL)) return false; return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; } static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) return; /* Let normal cleanup path reap it if we fail adding to the cache */ if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } } static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_cache_entry *entry; struct io_async_msghdr *hdr; if (!(issue_flags & IO_URING_F_UNLOCKED)) { entry = io_alloc_cache_get(&ctx->netmsg_cache); if (entry) { hdr = container_of(entry, struct io_async_msghdr, cache); hdr->free_iov = NULL; req->flags |= REQ_F_ASYNC_DATA; req->async_data = hdr; return hdr; } } if (!io_alloc_async_data(req)) { hdr = req->async_data; hdr->free_iov = NULL; return hdr; } return NULL; } static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) { /* ->prep_async is always called from the submission context */ return io_msg_alloc_async(req, 0); } static int io_setup_async_msg(struct io_kiocb *req, struct io_async_msghdr *kmsg, unsigned int issue_flags) { struct io_async_msghdr *async_msg; if (req_has_async_data(req)) return -EAGAIN; async_msg = io_msg_alloc_async(req, issue_flags); if (!async_msg) { kfree(kmsg->free_iov); return -ENOMEM; } req->flags |= REQ_F_NEED_CLEANUP; memcpy(async_msg, kmsg, sizeof(*kmsg)); if (async_msg->msg.msg_name) async_msg->msg.msg_name = &async_msg->addr; if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs) return -EAGAIN; /* if were using fast_iov, set it to the new one */ if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) { size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov; async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx]; } return -EAGAIN; } static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); int ret; iomsg->msg.msg_name = &iomsg->addr; iomsg->free_iov = iomsg->fast_iov; ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, &iomsg->free_iov); /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = iomsg->msg.msg_control_user; return ret; } int io_send_prep_async(struct io_kiocb *req) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *io; int ret; if (!zc->addr || req_has_async_data(req)) return 0; io = io_msg_alloc_async_prep(req); if (!io) return -ENOMEM; ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); return ret; } static int io_setup_async_addr(struct io_kiocb *req, struct sockaddr_storage *addr_storage, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *io; if (!sr->addr || req_has_async_data(req)) return -EAGAIN; io = io_msg_alloc_async(req, issue_flags); if (!io) return -ENOMEM; memcpy(&io->addr, addr_storage, sizeof(io->addr)); return -EAGAIN; } int io_sendmsg_prep_async(struct io_kiocb *req) { int ret; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; ret = io_sendmsg_copy_hdr(req, req->async_data); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; } void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) { struct io_async_msghdr *io = req->async_data; kfree(io->free_iov); } int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); if (req->opcode == IORING_OP_SEND) { if (READ_ONCE(sqe->__pad3[0])) return -EINVAL; sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); sr->addr_len = READ_ONCE(sqe->addr_len); } else if (sqe->addr2 || sqe->file_index) { return -EINVAL; } sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; if (sr->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; #ifdef CONFIG_COMPAT if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif sr->done_io = 0; return 0; } int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr iomsg, *kmsg; struct socket *sock; unsigned flags; int min_ret = 0; int ret; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; if (req_has_async_data(req)) { kmsg = req->async_data; kmsg->msg.msg_control_user = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) return ret; kmsg = &iomsg; } if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return io_setup_async_msg(req, kmsg, issue_flags); flags = sr->msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; if (flags & MSG_WAITALL) min_ret = iov_iter_count(&kmsg->msg.msg_iter); ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); if (ret < min_ret) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return io_setup_async_msg(req, kmsg, issue_flags); if (ret > 0 && io_net_retry(sock, flags)) { kmsg->msg.msg_controllen = 0; kmsg->msg.msg_control = NULL; sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_msg(req, kmsg, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; io_netmsg_recycle(req, issue_flags); if (ret >= 0) ret += sr->done_io; else if (sr->done_io) ret = sr->done_io; io_req_set_res(req, ret, 0); return IOU_OK; } int io_send(struct io_kiocb *req, unsigned int issue_flags) { struct sockaddr_storage __address; struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct msghdr msg; struct socket *sock; unsigned flags; int min_ret = 0; int ret; msg.msg_name = NULL; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_namelen = 0; msg.msg_ubuf = NULL; if (sr->addr) { if (req_has_async_data(req)) { struct io_async_msghdr *io = req->async_data; msg.msg_name = &io->addr; } else { ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); if (unlikely(ret < 0)) return ret; msg.msg_name = (struct sockaddr *)&__address; } msg.msg_namelen = sr->addr_len; } if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return io_setup_async_addr(req, &__address, issue_flags); sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter); if (unlikely(ret)) return ret; flags = sr->msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; if (flags & MSG_WAITALL) min_ret = iov_iter_count(&msg.msg_iter); flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; msg.msg_flags = flags; ret = sock_sendmsg(sock, &msg); if (ret < min_ret) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return io_setup_async_addr(req, &__address, issue_flags); if (ret > 0 && io_net_retry(sock, flags)) { sr->len -= ret; sr->buf += ret; sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_addr(req, &__address, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } if (ret >= 0) ret += sr->done_io; else if (sr->done_io) ret = sr->done_io; io_req_set_res(req, ret, 0); return IOU_OK; } static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) { int hdr; if (iomsg->namelen < 0) return true; if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), iomsg->namelen, &hdr)) return true; if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) return true; return false; } static int __io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct user_msghdr msg; int ret; if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) return -EFAULT; ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); if (ret) return ret; if (req->flags & REQ_F_BUFFER_SELECT) { if (msg.msg_iovlen == 0) { sr->len = iomsg->fast_iov[0].iov_len = 0; iomsg->fast_iov[0].iov_base = NULL; iomsg->free_iov = NULL; } else if (msg.msg_iovlen > 1) { return -EINVAL; } else { if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) return -EFAULT; sr->len = iomsg->fast_iov[0].iov_len; iomsg->free_iov = NULL; } if (req->flags & REQ_F_APOLL_MULTISHOT) { iomsg->namelen = msg.msg_namelen; iomsg->controllen = msg.msg_controllen; if (io_recvmsg_multishot_overflow(iomsg)) return -EOVERFLOW; } } else { iomsg->free_iov = iomsg->fast_iov; ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, &iomsg->free_iov, &iomsg->msg.msg_iter, false); if (ret > 0) ret = 0; } return ret; } #ifdef CONFIG_COMPAT static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct compat_msghdr msg; struct compat_iovec __user *uiov; int ret; if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) return -EFAULT; ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); if (ret) return ret; uiov = compat_ptr(msg.msg_iov); if (req->flags & REQ_F_BUFFER_SELECT) { compat_ssize_t clen; iomsg->free_iov = NULL; if (msg.msg_iovlen == 0) { sr->len = 0; } else if (msg.msg_iovlen > 1) { return -EINVAL; } else { if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(clen, &uiov->iov_len)) return -EFAULT; if (clen < 0) return -EINVAL; sr->len = clen; } if (req->flags & REQ_F_APOLL_MULTISHOT) { iomsg->namelen = msg.msg_namelen; iomsg->controllen = msg.msg_controllen; if (io_recvmsg_multishot_overflow(iomsg)) return -EOVERFLOW; } } else { iomsg->free_iov = iomsg->fast_iov; ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen, UIO_FASTIOV, &iomsg->free_iov, &iomsg->msg.msg_iter, true); if (ret < 0) return ret; } return 0; } #endif static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { iomsg->msg.msg_name = &iomsg->addr; iomsg->msg.msg_iter.nr_segs = 0; #ifdef CONFIG_COMPAT if (req->ctx->compat) return __io_compat_recvmsg_copy_hdr(req, iomsg); #endif return __io_recvmsg_copy_hdr(req, iomsg); } int io_recvmsg_prep_async(struct io_kiocb *req) { int ret; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; ret = io_recvmsg_copy_hdr(req, req->async_data); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; } #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT) int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~(RECVMSG_FLAGS)) return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags); if (sr->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; if (sr->msg_flags & MSG_ERRQUEUE) req->flags |= REQ_F_CLEAR_POLLIN; if (sr->flags & IORING_RECV_MULTISHOT) { if (!(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; if (sr->msg_flags & MSG_WAITALL) return -EINVAL; if (req->opcode == IORING_OP_RECV && sr->len) return -EINVAL; req->flags |= REQ_F_APOLL_MULTISHOT; /* * Store the buffer group for this multishot receive separately, * as if we end up doing an io-wq based issue that selects a * buffer, it has to be committed immediately and that will * clear ->buf_list. This means we lose the link to the buffer * list, and the eventual buffer put on completion then cannot * restore it. */ sr->buf_group = req->buf_index; } #ifdef CONFIG_COMPAT if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif sr->done_io = 0; return 0; } static inline void io_recv_prep_retry(struct io_kiocb *req) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; sr->len = 0; /* get from the provided buffer */ req->buf_index = sr->buf_group; } /* * Finishes io_recv and io_recvmsg. * * Returns true if it is actually finished, or false if it should run * again (for multishot). */ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, struct msghdr *msg, bool mshot_finished, unsigned issue_flags) { unsigned int cflags; cflags = io_put_kbuf(req, issue_flags); if (msg->msg_inq && msg->msg_inq != -1) cflags |= IORING_CQE_F_SOCK_NONEMPTY; if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { io_req_set_res(req, *ret, cflags); *ret = IOU_OK; return true; } if (!mshot_finished) { if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, *ret, cflags | IORING_CQE_F_MORE)) { io_recv_prep_retry(req); /* Known not-empty or unknown state, retry */ if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) return false; if (issue_flags & IO_URING_F_MULTISHOT) *ret = IOU_ISSUE_SKIP_COMPLETE; else *ret = -EAGAIN; return true; } /* Otherwise stop multishot but use the current result. */ } io_req_set_res(req, *ret, cflags); if (issue_flags & IO_URING_F_MULTISHOT) *ret = IOU_STOP_MULTISHOT; else *ret = IOU_OK; return true; } static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, struct io_sr_msg *sr, void __user **buf, size_t *len) { unsigned long ubuf = (unsigned long) *buf; unsigned long hdr; hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + kmsg->controllen; if (*len < hdr) return -EFAULT; if (kmsg->controllen) { unsigned long control = ubuf + hdr - kmsg->controllen; kmsg->msg.msg_control_user = (void __user *) control; kmsg->msg.msg_controllen = kmsg->controllen; } sr->buf = *buf; /* stash for later copy */ *buf = (void __user *) (ubuf + hdr); kmsg->payloadlen = *len = *len - hdr; return 0; } struct io_recvmsg_multishot_hdr { struct io_uring_recvmsg_out msg; struct sockaddr_storage addr; }; static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, struct io_async_msghdr *kmsg, unsigned int flags, bool *finished) { int err; int copy_len; struct io_recvmsg_multishot_hdr hdr; if (kmsg->namelen) kmsg->msg.msg_name = &hdr.addr; kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); kmsg->msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &kmsg->msg, flags); *finished = err <= 0; if (err < 0) return err; hdr.msg = (struct io_uring_recvmsg_out) { .controllen = kmsg->controllen - kmsg->msg.msg_controllen, .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT }; hdr.msg.payloadlen = err; if (err > kmsg->payloadlen) err = kmsg->payloadlen; copy_len = sizeof(struct io_uring_recvmsg_out); if (kmsg->msg.msg_namelen > kmsg->namelen) copy_len += kmsg->namelen; else copy_len += kmsg->msg.msg_namelen; /* * "fromlen shall refer to the value before truncation.." * 1003.1g */ hdr.msg.namelen = kmsg->msg.msg_namelen; /* ensure that there is no gap between hdr and sockaddr_storage */ BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != sizeof(struct io_uring_recvmsg_out)); if (copy_to_user(io->buf, &hdr, copy_len)) { *finished = true; return -EFAULT; } return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + kmsg->controllen + err; } int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr iomsg, *kmsg; struct socket *sock; unsigned flags; int ret, min_ret = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; bool mshot_finished = true; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; if (req_has_async_data(req)) { kmsg = req->async_data; } else { ret = io_recvmsg_copy_hdr(req, &iomsg); if (ret) return ret; kmsg = &iomsg; } if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return io_setup_async_msg(req, kmsg, issue_flags); if (!io_check_multishot(req, issue_flags)) return io_setup_async_msg(req, kmsg, issue_flags); retry_multishot: if (io_do_buffer_select(req)) { void __user *buf; size_t len = sr->len; buf = io_buffer_select(req, &len, issue_flags); if (!buf) return -ENOBUFS; if (req->flags & REQ_F_APOLL_MULTISHOT) { ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); if (ret) { io_kbuf_recycle(req, issue_flags); return ret; } } iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); } flags = sr->msg_flags; if (force_nonblock) flags |= MSG_DONTWAIT; kmsg->msg.msg_get_inq = 1; kmsg->msg.msg_inq = -1; if (req->flags & REQ_F_APOLL_MULTISHOT) { ret = io_recvmsg_multishot(sock, sr, kmsg, flags, &mshot_finished); } else { /* disable partial retry for recvmsg with cmsg attached */ if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) min_ret = iov_iter_count(&kmsg->msg.msg_iter); ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags); } if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { ret = io_setup_async_msg(req, kmsg, issue_flags); if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } return ret; } if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_msg(req, kmsg, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); } if (ret > 0) ret += sr->done_io; else if (sr->done_io) ret = sr->done_io; else io_kbuf_recycle(req, issue_flags); if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags)) goto retry_multishot; if (mshot_finished) { /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) kfree(kmsg->free_iov); io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; } return ret; } int io_recv(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct msghdr msg; struct socket *sock; unsigned flags; int ret, min_ret = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; size_t len = sr->len; if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; if (!io_check_multishot(req, issue_flags)) return -EAGAIN; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = NULL; msg.msg_get_inq = 1; msg.msg_controllen = 0; msg.msg_iocb = NULL; msg.msg_ubuf = NULL; retry_multishot: if (io_do_buffer_select(req)) { void __user *buf; buf = io_buffer_select(req, &len, issue_flags); if (!buf) return -ENOBUFS; sr->buf = buf; } ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter); if (unlikely(ret)) goto out_free; msg.msg_inq = -1; msg.msg_flags = 0; flags = sr->msg_flags; if (force_nonblock) flags |= MSG_DONTWAIT; if (flags & MSG_WAITALL) min_ret = iov_iter_count(&msg.msg_iter); ret = sock_recvmsg(sock, &msg, flags); if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { if (issue_flags & IO_URING_F_MULTISHOT) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } return -EAGAIN; } if (ret > 0 && io_net_retry(sock, flags)) { sr->len -= ret; sr->buf += ret; sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return -EAGAIN; } if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { out_free: req_set_fail(req); } if (ret > 0) ret += sr->done_io; else if (sr->done_io) ret = sr->done_io; else io_kbuf_recycle(req, issue_flags); if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags)) goto retry_multishot; return ret; } void io_send_zc_cleanup(struct io_kiocb *req) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *io; if (req_has_async_data(req)) { io = req->async_data; /* might be ->fast_iov if *msg_copy_hdr failed */ if (io->free_iov != io->fast_iov) kfree(io->free_iov); } if (zc->notif) { io_notif_flush(zc->notif); zc->notif = NULL; } } #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *notif; if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) return -EINVAL; /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ if (req->flags & REQ_F_CQE_SKIP) return -EINVAL; notif = zc->notif = io_alloc_notif(ctx); if (!notif) return -ENOMEM; notif->cqe.user_data = req->cqe.user_data; notif->cqe.res = 0; notif->cqe.flags = IORING_CQE_F_NOTIF; req->flags |= REQ_F_NEED_CLEANUP; zc->flags = READ_ONCE(sqe->ioprio); if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { if (zc->flags & ~IO_ZC_FLAGS_VALID) return -EINVAL; if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { io_notif_set_extended(notif); io_notif_to_data(notif)->zc_report = true; } } if (zc->flags & IORING_RECVSEND_FIXED_BUF) { unsigned idx = READ_ONCE(sqe->buf_index); if (unlikely(idx >= ctx->nr_user_bufs)) return -EFAULT; idx = array_index_nospec(idx, ctx->nr_user_bufs); req->imu = READ_ONCE(ctx->user_bufs[idx]); io_req_set_rsrc_node(notif, ctx, 0); } if (req->opcode == IORING_OP_SEND_ZC) { if (READ_ONCE(sqe->__pad3[0])) return -EINVAL; zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); zc->addr_len = READ_ONCE(sqe->addr_len); } else { if (unlikely(sqe->addr2 || sqe->file_index)) return -EINVAL; if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) return -EINVAL; } zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; if (zc->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; zc->done_io = 0; #ifdef CONFIG_COMPAT if (req->ctx->compat) zc->msg_flags |= MSG_CMSG_COMPAT; #endif return 0; } static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length) { skb_zcopy_downgrade_managed(skb); return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); } static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length) { struct skb_shared_info *shinfo = skb_shinfo(skb); int frag = shinfo->nr_frags; int ret = 0; struct bvec_iter bi; ssize_t copied = 0; unsigned long truesize = 0; if (!frag) shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; else if (unlikely(!skb_zcopy_managed(skb))) return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); bi.bi_size = min(from->count, length); bi.bi_bvec_done = from->iov_offset; bi.bi_idx = 0; while (bi.bi_size && frag < MAX_SKB_FRAGS) { struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); copied += v.bv_len; truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, v.bv_offset, v.bv_len); bvec_iter_advance_single(from->bvec, &bi, v.bv_len); } if (bi.bi_size) ret = -EMSGSIZE; shinfo->nr_frags = frag; from->bvec += bi.bi_idx; from->nr_segs -= bi.bi_idx; from->count -= copied; from->iov_offset = bi.bi_bvec_done; skb->data_len += copied; skb->len += copied; skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { sk_wmem_queued_add(sk, truesize); if (!skb_zcopy_pure(skb)) sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } return ret; } int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) { struct sockaddr_storage __address; struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct msghdr msg; struct socket *sock; unsigned msg_flags; int ret, min_ret = 0; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) return -EOPNOTSUPP; msg.msg_name = NULL; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_namelen = 0; if (zc->addr) { if (req_has_async_data(req)) { struct io_async_msghdr *io = req->async_data; msg.msg_name = &io->addr; } else { ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); if (unlikely(ret < 0)) return ret; msg.msg_name = (struct sockaddr *)&__address; } msg.msg_namelen = zc->addr_len; } if (!(req->flags & REQ_F_POLLED) && (zc->flags & IORING_RECVSEND_POLL_FIRST)) return io_setup_async_addr(req, &__address, issue_flags); if (zc->flags & IORING_RECVSEND_FIXED_BUF) { ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); if (unlikely(ret)) return ret; msg.sg_from_iter = io_sg_from_iter; } else { io_notif_set_extended(zc->notif); ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter); if (unlikely(ret)) return ret; ret = io_notif_account_mem(zc->notif, zc->len); if (unlikely(ret)) return ret; msg.sg_from_iter = io_sg_from_iter_iovec; } msg_flags = zc->msg_flags | MSG_ZEROCOPY; if (issue_flags & IO_URING_F_NONBLOCK) msg_flags |= MSG_DONTWAIT; if (msg_flags & MSG_WAITALL) min_ret = iov_iter_count(&msg.msg_iter); msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; msg.msg_flags = msg_flags; msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; ret = sock_sendmsg(sock, &msg); if (unlikely(ret < min_ret)) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return io_setup_async_addr(req, &__address, issue_flags); if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { zc->len -= ret; zc->buf += ret; zc->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_addr(req, &__address, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } if (ret >= 0) ret += zc->done_io; else if (zc->done_io) ret = zc->done_io; /* * If we're in io-wq we can't rely on tw ordering guarantees, defer * flushing notif to io_send_zc_cleanup() */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(zc->notif); req->flags &= ~REQ_F_NEED_CLEANUP; } io_req_set_res(req, ret, IORING_CQE_F_MORE); return IOU_OK; } int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr iomsg, *kmsg; struct socket *sock; unsigned flags; int ret, min_ret = 0; io_notif_set_extended(sr->notif); sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) return -EOPNOTSUPP; if (req_has_async_data(req)) { kmsg = req->async_data; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) return ret; kmsg = &iomsg; } if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return io_setup_async_msg(req, kmsg, issue_flags); flags = sr->msg_flags | MSG_ZEROCOPY; if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; if (flags & MSG_WAITALL) min_ret = iov_iter_count(&kmsg->msg.msg_iter); kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); if (unlikely(ret < min_ret)) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return io_setup_async_msg(req, kmsg, issue_flags); if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_msg(req, kmsg, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) { kfree(kmsg->free_iov); kmsg->free_iov = NULL; } io_netmsg_recycle(req, issue_flags); if (ret >= 0) ret += sr->done_io; else if (sr->done_io) ret = sr->done_io; /* * If we're in io-wq we can't rely on tw ordering guarantees, defer * flushing notif to io_send_zc_cleanup() */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(sr->notif); req->flags &= ~REQ_F_NEED_CLEANUP; } io_req_set_res(req, ret, IORING_CQE_F_MORE); return IOU_OK; } void io_sendrecv_fail(struct io_kiocb *req) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); if (req->flags & REQ_F_PARTIAL_IO) req->cqe.res = sr->done_io; if ((req->flags & REQ_F_NEED_CLEANUP) && (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) req->cqe.flags |= IORING_CQE_F_MORE; } int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); unsigned flags; if (sqe->len || sqe->buf_index) return -EINVAL; accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); accept->flags = READ_ONCE(sqe->accept_flags); accept->nofile = rlimit(RLIMIT_NOFILE); flags = READ_ONCE(sqe->ioprio); if (flags & ~IORING_ACCEPT_MULTISHOT) return -EINVAL; accept->file_slot = READ_ONCE(sqe->file_index); if (accept->file_slot) { if (accept->flags & SOCK_CLOEXEC) return -EINVAL; if (flags & IORING_ACCEPT_MULTISHOT && accept->file_slot != IORING_FILE_INDEX_ALLOC) return -EINVAL; } if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; if (flags & IORING_ACCEPT_MULTISHOT) req->flags |= REQ_F_APOLL_MULTISHOT; return 0; } int io_accept(struct io_kiocb *req, unsigned int issue_flags) { struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; bool fixed = !!accept->file_slot; struct file *file; int ret, fd; if (!io_check_multishot(req, issue_flags)) return -EAGAIN; retry: if (!fixed) { fd = __get_unused_fd_flags(accept->flags, accept->nofile); if (unlikely(fd < 0)) return fd; } file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, accept->flags); if (IS_ERR(file)) { if (!fixed) put_unused_fd(fd); ret = PTR_ERR(file); if (ret == -EAGAIN && force_nonblock) { /* * if it's multishot and polled, we don't need to * return EAGAIN to arm the poll infra since it * has already been done */ if (issue_flags & IO_URING_F_MULTISHOT) ret = IOU_ISSUE_SKIP_COMPLETE; return ret; } if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } else if (!fixed) { fd_install(fd, file); ret = fd; } else { ret = io_fixed_fd_install(req, issue_flags, file, accept->file_slot); } if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { io_req_set_res(req, ret, 0); return IOU_OK; } if (ret < 0) return ret; if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, ret, IORING_CQE_F_MORE)) goto retry; return -ECANCELED; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); if (sqe->addr || sqe->rw_flags || sqe->buf_index) return -EINVAL; sock->domain = READ_ONCE(sqe->fd); sock->type = READ_ONCE(sqe->off); sock->protocol = READ_ONCE(sqe->len); sock->file_slot = READ_ONCE(sqe->file_index); sock->nofile = rlimit(RLIMIT_NOFILE); sock->flags = sock->type & ~SOCK_TYPE_MASK; if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) return -EINVAL; if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; return 0; } int io_socket(struct io_kiocb *req, unsigned int issue_flags) { struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); bool fixed = !!sock->file_slot; struct file *file; int ret, fd; if (!fixed) { fd = __get_unused_fd_flags(sock->flags, sock->nofile); if (unlikely(fd < 0)) return fd; } file = __sys_socket_file(sock->domain, sock->type, sock->protocol); if (IS_ERR(file)) { if (!fixed) put_unused_fd(fd); ret = PTR_ERR(file); if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); } else if (!fixed) { fd_install(fd, file); ret = fd; } else { ret = io_fixed_fd_install(req, issue_flags, file, sock->file_slot); } io_req_set_res(req, ret, 0); return IOU_OK; } int io_connect_prep_async(struct io_kiocb *req) { struct io_async_connect *io = req->async_data; struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); } int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); conn->addr_len = READ_ONCE(sqe->addr2); conn->in_progress = conn->seen_econnaborted = false; return 0; } int io_connect(struct io_kiocb *req, unsigned int issue_flags) { struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); struct io_async_connect __io, *io; unsigned file_flags; int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; if (req_has_async_data(req)) { io = req->async_data; } else { ret = move_addr_to_kernel(connect->addr, connect->addr_len, &__io.address); if (ret) goto out; io = &__io; } file_flags = force_nonblock ? O_NONBLOCK : 0; ret = __sys_connect_file(req->file, &io->address, connect->addr_len, file_flags); if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) && force_nonblock) { if (ret == -EINPROGRESS) { connect->in_progress = true; } else if (ret == -ECONNABORTED) { if (connect->seen_econnaborted) goto out; connect->seen_econnaborted = true; } if (req_has_async_data(req)) return -EAGAIN; if (io_alloc_async_data(req)) { ret = -ENOMEM; goto out; } memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } if (connect->in_progress) { /* * At least bluetooth will return -EBADFD on a re-connect * attempt, and it's (supposedly) also valid to get -EISCONN * which means the previous result is good. For both of these, * grab the sock_error() and use that for the completion. */ if (ret == -EBADFD || ret == -EISCONN) ret = sock_error(sock_from_file(req->file)->sk); } if (ret == -ERESTARTSYS) ret = -EINTR; out: if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } void io_netmsg_cache_free(struct io_cache_entry *entry) { kfree(container_of(entry, struct io_async_msghdr, cache)); } #endif |
490 489 2 526 8 491 125 530 12 495 577 528 534 578 1 488 1 575 14 494 37 1 17 4 17 17 1 1 4 125 125 9 9 129 129 129 1 1 1 2 1 1 130 124 8 124 130 124 124 8 8 493 495 18 3 495 60 17 495 62 494 496 48 48 539 540 2 505 114 541 1 497 10 19 12 11 526 9 528 3 15 458 17 487 2 12 12 486 540 498 2 43 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 | // SPDX-License-Identifier: GPL-2.0+ /* * dummy_hcd.c -- Dummy/Loopback USB host and device emulator driver. * * Maintainer: Alan Stern <stern@rowland.harvard.edu> * * Copyright (C) 2003 David Brownell * Copyright (C) 2003-2005 Alan Stern */ /* * This exposes a device side "USB gadget" API, driven by requests to a * Linux-USB host controller driver. USB traffic is simulated; there's * no need for USB hardware. Use this with two other drivers: * * - Gadget driver, responding to requests (device); * - Host-side device driver, as already familiar in Linux. * * Having this all in one kernel can help some stages of development, * bypassing some hardware (and driver) issues. UML could help too. * * Note: The emulation does not include isochronous transfers! */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/ioport.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/timer.h> #include <linux/list.h> #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/usb.h> #include <linux/usb/gadget.h> #include <linux/usb/hcd.h> #include <linux/scatterlist.h> #include <asm/byteorder.h> #include <linux/io.h> #include <asm/irq.h> #include <asm/unaligned.h> #define DRIVER_DESC "USB Host+Gadget Emulator" #define DRIVER_VERSION "02 May 2005" #define POWER_BUDGET 500 /* in mA; use 8 for low-power port testing */ #define POWER_BUDGET_3 900 /* in mA */ static const char driver_name[] = "dummy_hcd"; static const char driver_desc[] = "USB Host+Gadget Emulator"; static const char gadget_name[] = "dummy_udc"; MODULE_DESCRIPTION(DRIVER_DESC); MODULE_AUTHOR("David Brownell"); MODULE_LICENSE("GPL"); struct dummy_hcd_module_parameters { bool is_super_speed; bool is_high_speed; unsigned int num; }; static struct dummy_hcd_module_parameters mod_data = { .is_super_speed = false, .is_high_speed = true, .num = 1, }; module_param_named(is_super_speed, mod_data.is_super_speed, bool, S_IRUGO); MODULE_PARM_DESC(is_super_speed, "true to simulate SuperSpeed connection"); module_param_named(is_high_speed, mod_data.is_high_speed, bool, S_IRUGO); MODULE_PARM_DESC(is_high_speed, "true to simulate HighSpeed connection"); module_param_named(num, mod_data.num, uint, S_IRUGO); MODULE_PARM_DESC(num, "number of emulated controllers"); /*-------------------------------------------------------------------------*/ /* gadget side driver data structres */ struct dummy_ep { struct list_head queue; unsigned long last_io; /* jiffies timestamp */ struct usb_gadget *gadget; const struct usb_endpoint_descriptor *desc; struct usb_ep ep; unsigned halted:1; unsigned wedged:1; unsigned already_seen:1; unsigned setup_stage:1; unsigned stream_en:1; }; struct dummy_request { struct list_head queue; /* ep's requests */ struct usb_request req; }; static inline struct dummy_ep *usb_ep_to_dummy_ep(struct usb_ep *_ep) { return container_of(_ep, struct dummy_ep, ep); } static inline struct dummy_request *usb_request_to_dummy_request (struct usb_request *_req) { return container_of(_req, struct dummy_request, req); } /*-------------------------------------------------------------------------*/ /* * Every device has ep0 for control requests, plus up to 30 more endpoints, * in one of two types: * * - Configurable: direction (in/out), type (bulk, iso, etc), and endpoint * number can be changed. Names like "ep-a" are used for this type. * * - Fixed Function: in other cases. some characteristics may be mutable; * that'd be hardware-specific. Names like "ep12out-bulk" are used. * * Gadget drivers are responsible for not setting up conflicting endpoint * configurations, illegal or unsupported packet lengths, and so on. */ static const char ep0name[] = "ep0"; static const struct { const char *name; const struct usb_ep_caps caps; } ep_info[] = { #define EP_INFO(_name, _caps) \ { \ .name = _name, \ .caps = _caps, \ } /* we don't provide isochronous endpoints since we don't support them */ #define TYPE_BULK_OR_INT (USB_EP_CAPS_TYPE_BULK | USB_EP_CAPS_TYPE_INT) /* everyone has ep0 */ EP_INFO(ep0name, USB_EP_CAPS(USB_EP_CAPS_TYPE_CONTROL, USB_EP_CAPS_DIR_ALL)), /* act like a pxa250: fifteen fixed function endpoints */ EP_INFO("ep1in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), EP_INFO("ep2out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), /* EP_INFO("ep3in-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)), EP_INFO("ep4out-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)), */ EP_INFO("ep5in-int", USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep6in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), EP_INFO("ep7out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), /* EP_INFO("ep8in-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)), EP_INFO("ep9out-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)), */ EP_INFO("ep10in-int", USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep11in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), EP_INFO("ep12out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), /* EP_INFO("ep13in-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)), EP_INFO("ep14out-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)), */ EP_INFO("ep15in-int", USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)), /* or like sa1100: two fixed function endpoints */ EP_INFO("ep1out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep2in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), /* and now some generic EPs so we have enough in multi config */ EP_INFO("ep-aout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-bin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-cout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-dout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-ein", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-fout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-gin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-hout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-iout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-jin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-kout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-lin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-mout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), #undef EP_INFO }; #define DUMMY_ENDPOINTS ARRAY_SIZE(ep_info) /*-------------------------------------------------------------------------*/ #define FIFO_SIZE 64 struct urbp { struct urb *urb; struct list_head urbp_list; struct sg_mapping_iter miter; u32 miter_started; }; enum dummy_rh_state { DUMMY_RH_RESET, DUMMY_RH_SUSPENDED, DUMMY_RH_RUNNING }; struct dummy_hcd { struct dummy *dum; enum dummy_rh_state rh_state; struct timer_list timer; u32 port_status; u32 old_status; unsigned long re_timeout; struct usb_device *udev; struct list_head urbp_list; struct urbp *next_frame_urbp; u32 stream_en_ep; u8 num_stream[30 / 2]; unsigned active:1; unsigned old_active:1; unsigned resuming:1; }; struct dummy { spinlock_t lock; /* * DEVICE/GADGET side support */ struct dummy_ep ep[DUMMY_ENDPOINTS]; int address; int callback_usage; struct usb_gadget gadget; struct usb_gadget_driver *driver; struct dummy_request fifo_req; u8 fifo_buf[FIFO_SIZE]; u16 devstatus; unsigned ints_enabled:1; unsigned udc_suspended:1; unsigned pullup:1; /* * HOST side support */ struct dummy_hcd *hs_hcd; struct dummy_hcd *ss_hcd; }; static inline struct dummy_hcd *hcd_to_dummy_hcd(struct usb_hcd *hcd) { return (struct dummy_hcd *) (hcd->hcd_priv); } static inline struct usb_hcd *dummy_hcd_to_hcd(struct dummy_hcd *dum) { return container_of((void *) dum, struct usb_hcd, hcd_priv); } static inline struct device *dummy_dev(struct dummy_hcd *dum) { return dummy_hcd_to_hcd(dum)->self.controller; } static inline struct device *udc_dev(struct dummy *dum) { return dum->gadget.dev.parent; } static inline struct dummy *ep_to_dummy(struct dummy_ep *ep) { return container_of(ep->gadget, struct dummy, gadget); } static inline struct dummy_hcd *gadget_to_dummy_hcd(struct usb_gadget *gadget) { struct dummy *dum = container_of(gadget, struct dummy, gadget); if (dum->gadget.speed == USB_SPEED_SUPER) return dum->ss_hcd; else return dum->hs_hcd; } static inline struct dummy *gadget_dev_to_dummy(struct device *dev) { return container_of(dev, struct dummy, gadget.dev); } /*-------------------------------------------------------------------------*/ /* DEVICE/GADGET SIDE UTILITY ROUTINES */ /* called with spinlock held */ static void nuke(struct dummy *dum, struct dummy_ep *ep) { while (!list_empty(&ep->queue)) { struct dummy_request *req; req = list_entry(ep->queue.next, struct dummy_request, queue); list_del_init(&req->queue); req->req.status = -ESHUTDOWN; spin_unlock(&dum->lock); usb_gadget_giveback_request(&ep->ep, &req->req); spin_lock(&dum->lock); } } /* caller must hold lock */ static void stop_activity(struct dummy *dum) { int i; /* prevent any more requests */ dum->address = 0; /* The timer is left running so that outstanding URBs can fail */ /* nuke any pending requests first, so driver i/o is quiesced */ for (i = 0; i < DUMMY_ENDPOINTS; ++i) nuke(dum, &dum->ep[i]); /* driver now does any non-usb quiescing necessary */ } /** * set_link_state_by_speed() - Sets the current state of the link according to * the hcd speed * @dum_hcd: pointer to the dummy_hcd structure to update the link state for * * This function updates the port_status according to the link state and the * speed of the hcd. */ static void set_link_state_by_speed(struct dummy_hcd *dum_hcd) { struct dummy *dum = dum_hcd->dum; if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) { if ((dum_hcd->port_status & USB_SS_PORT_STAT_POWER) == 0) { dum_hcd->port_status = 0; } else if (!dum->pullup || dum->udc_suspended) { /* UDC suspend must cause a disconnect */ dum_hcd->port_status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE); if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) != 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); } else { /* device is connected and not suspended */ dum_hcd->port_status |= (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_SPEED_5GBPS) ; if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) == 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) && (dum_hcd->port_status & USB_PORT_STAT_LINK_STATE) == USB_SS_PORT_LS_U0 && dum_hcd->rh_state != DUMMY_RH_SUSPENDED) dum_hcd->active = 1; } } else { if ((dum_hcd->port_status & USB_PORT_STAT_POWER) == 0) { dum_hcd->port_status = 0; } else if (!dum->pullup || dum->udc_suspended) { /* UDC suspend must cause a disconnect */ dum_hcd->port_status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED | USB_PORT_STAT_SUSPEND); if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) != 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); } else { dum_hcd->port_status |= USB_PORT_STAT_CONNECTION; if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) == 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0) dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND; else if ((dum_hcd->port_status & USB_PORT_STAT_SUSPEND) == 0 && dum_hcd->rh_state != DUMMY_RH_SUSPENDED) dum_hcd->active = 1; } } } /* caller must hold lock */ static void set_link_state(struct dummy_hcd *dum_hcd) __must_hold(&dum->lock) { struct dummy *dum = dum_hcd->dum; unsigned int power_bit; dum_hcd->active = 0; if (dum->pullup) if ((dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 && dum->gadget.speed != USB_SPEED_SUPER) || (dummy_hcd_to_hcd(dum_hcd)->speed != HCD_USB3 && dum->gadget.speed == USB_SPEED_SUPER)) return; set_link_state_by_speed(dum_hcd); power_bit = (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 ? USB_SS_PORT_STAT_POWER : USB_PORT_STAT_POWER); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0 || dum_hcd->active) dum_hcd->resuming = 0; /* Currently !connected or in reset */ if ((dum_hcd->port_status & power_bit) == 0 || (dum_hcd->port_status & USB_PORT_STAT_RESET) != 0) { unsigned int disconnect = power_bit & dum_hcd->old_status & (~dum_hcd->port_status); unsigned int reset = USB_PORT_STAT_RESET & (~dum_hcd->old_status) & dum_hcd->port_status; /* Report reset and disconnect events to the driver */ if (dum->ints_enabled && (disconnect || reset)) { stop_activity(dum); ++dum->callback_usage; spin_unlock(&dum->lock); if (reset) usb_gadget_udc_reset(&dum->gadget, dum->driver); else dum->driver->disconnect(&dum->gadget); spin_lock(&dum->lock); --dum->callback_usage; } } else if (dum_hcd->active != dum_hcd->old_active && dum->ints_enabled) { ++dum->callback_usage; spin_unlock(&dum->lock); if (dum_hcd->old_active && dum->driver->suspend) dum->driver->suspend(&dum->gadget); else if (!dum_hcd->old_active && dum->driver->resume) dum->driver->resume(&dum->gadget); spin_lock(&dum->lock); --dum->callback_usage; } dum_hcd->old_status = dum_hcd->port_status; dum_hcd->old_active = dum_hcd->active; } /*-------------------------------------------------------------------------*/ /* DEVICE/GADGET SIDE DRIVER * * This only tracks gadget state. All the work is done when the host * side tries some (emulated) i/o operation. Real device controller * drivers would do real i/o using dma, fifos, irqs, timers, etc. */ #define is_enabled(dum) \ (dum->port_status & USB_PORT_STAT_ENABLE) static int dummy_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) { struct dummy *dum; struct dummy_hcd *dum_hcd; struct dummy_ep *ep; unsigned max; int retval; ep = usb_ep_to_dummy_ep(_ep); if (!_ep || !desc || ep->desc || _ep->name == ep0name || desc->bDescriptorType != USB_DT_ENDPOINT) return -EINVAL; dum = ep_to_dummy(ep); if (!dum->driver) return -ESHUTDOWN; dum_hcd = gadget_to_dummy_hcd(&dum->gadget); if (!is_enabled(dum_hcd)) return -ESHUTDOWN; /* * For HS/FS devices only bits 0..10 of the wMaxPacketSize represent the * maximum packet size. * For SS devices the wMaxPacketSize is limited by 1024. */ max = usb_endpoint_maxp(desc); /* drivers must not request bad settings, since lower levels * (hardware or its drivers) may not check. some endpoints * can't do iso, many have maxpacket limitations, etc. * * since this "hardware" driver is here to help debugging, we * have some extra sanity checks. (there could be more though, * especially for "ep9out" style fixed function ones.) */ retval = -EINVAL; switch (usb_endpoint_type(desc)) { case USB_ENDPOINT_XFER_BULK: if (strstr(ep->ep.name, "-iso") || strstr(ep->ep.name, "-int")) { goto done; } switch (dum->gadget.speed) { case USB_SPEED_SUPER: if (max == 1024) break; goto done; case USB_SPEED_HIGH: if (max == 512) break; goto done; case USB_SPEED_FULL: if (max == 8 || max == 16 || max == 32 || max == 64) /* we'll fake any legal size */ break; /* save a return statement */ fallthrough; default: goto done; } break; case USB_ENDPOINT_XFER_INT: if (strstr(ep->ep.name, "-iso")) /* bulk is ok */ goto done; /* real hardware might not handle all packet sizes */ switch (dum->gadget.speed) { case USB_SPEED_SUPER: case USB_SPEED_HIGH: if (max <= 1024) break; /* save a return statement */ fallthrough; case USB_SPEED_FULL: if (max <= 64) break; /* save a return statement */ fallthrough; default: if (max <= 8) break; goto done; } break; case USB_ENDPOINT_XFER_ISOC: if (strstr(ep->ep.name, "-bulk") || strstr(ep->ep.name, "-int")) goto done; /* real hardware might not handle all packet sizes */ switch (dum->gadget.speed) { case USB_SPEED_SUPER: case USB_SPEED_HIGH: if (max <= 1024) break; /* save a return statement */ fallthrough; case USB_SPEED_FULL: if (max <= 1023) break; /* save a return statement */ fallthrough; default: goto done; } break; default: /* few chips support control except on ep0 */ goto done; } _ep->maxpacket = max; if (usb_ss_max_streams(_ep->comp_desc)) { if (!usb_endpoint_xfer_bulk(desc)) { dev_err(udc_dev(dum), "Can't enable stream support on " "non-bulk ep %s\n", _ep->name); return -EINVAL; } ep->stream_en = 1; } ep->desc = desc; dev_dbg(udc_dev(dum), "enabled %s (ep%d%s-%s) maxpacket %d stream %s\n", _ep->name, desc->bEndpointAddress & 0x0f, (desc->bEndpointAddress & USB_DIR_IN) ? "in" : "out", usb_ep_type_string(usb_endpoint_type(desc)), max, ep->stream_en ? "enabled" : "disabled"); /* at this point real hardware should be NAKing transfers * to that endpoint, until a buffer is queued to it. */ ep->halted = ep->wedged = 0; retval = 0; done: return retval; } static int dummy_disable(struct usb_ep *_ep) { struct dummy_ep *ep; struct dummy *dum; unsigned long flags; ep = usb_ep_to_dummy_ep(_ep); if (!_ep || !ep->desc || _ep->name == ep0name) return -EINVAL; dum = ep_to_dummy(ep); spin_lock_irqsave(&dum->lock, flags); ep->desc = NULL; ep->stream_en = 0; nuke(dum, ep); spin_unlock_irqrestore(&dum->lock, flags); dev_dbg(udc_dev(dum), "disabled %s\n", _ep->name); return 0; } static struct usb_request *dummy_alloc_request(struct usb_ep *_ep, gfp_t mem_flags) { struct dummy_request *req; if (!_ep) return NULL; req = kzalloc(sizeof(*req), mem_flags); if (!req) return NULL; INIT_LIST_HEAD(&req->queue); return &req->req; } static void dummy_free_request(struct usb_ep *_ep, struct usb_request *_req) { struct dummy_request *req; if (!_ep || !_req) { WARN_ON(1); return; } req = usb_request_to_dummy_request(_req); WARN_ON(!list_empty(&req->queue)); kfree(req); } static void fifo_complete(struct usb_ep *ep, struct usb_request *req) { } static int dummy_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t mem_flags) { struct dummy_ep *ep; struct dummy_request *req; struct dummy *dum; struct dummy_hcd *dum_hcd; unsigned long flags; req = usb_request_to_dummy_request(_req); if (!_req || !list_empty(&req->queue) || !_req->complete) return -EINVAL; ep = usb_ep_to_dummy_ep(_ep); if (!_ep || (!ep->desc && _ep->name != ep0name)) return -EINVAL; dum = ep_to_dummy(ep); dum_hcd = gadget_to_dummy_hcd(&dum->gadget); if (!dum->driver || !is_enabled(dum_hcd)) return -ESHUTDOWN; #if 0 dev_dbg(udc_dev(dum), "ep %p queue req %p to %s, len %d buf %p\n", ep, _req, _ep->name, _req->length, _req->buf); #endif _req->status = -EINPROGRESS; _req->actual = 0; spin_lock_irqsave(&dum->lock, flags); /* implement an emulated single-request FIFO */ if (ep->desc && (ep->desc->bEndpointAddress & USB_DIR_IN) && list_empty(&dum->fifo_req.queue) && list_empty(&ep->queue) && _req->length <= FIFO_SIZE) { req = &dum->fifo_req; req->req = *_req; req->req.buf = dum->fifo_buf; memcpy(dum->fifo_buf, _req->buf, _req->length); req->req.context = dum; req->req.complete = fifo_complete; list_add_tail(&req->queue, &ep->queue); spin_unlock(&dum->lock); _req->actual = _req->length; _req->status = 0; usb_gadget_giveback_request(_ep, _req); spin_lock(&dum->lock); } else list_add_tail(&req->queue, &ep->queue); spin_unlock_irqrestore(&dum->lock, flags); /* real hardware would likely enable transfers here, in case * it'd been left NAKing. */ return 0; } static int dummy_dequeue(struct usb_ep *_ep, struct usb_request *_req) { struct dummy_ep *ep; struct dummy *dum; int retval = -EINVAL; unsigned long flags; struct dummy_request *req = NULL, *iter; if (!_ep || !_req) return retval; ep = usb_ep_to_dummy_ep(_ep); dum = ep_to_dummy(ep); if (!dum->driver) return -ESHUTDOWN; local_irq_save(flags); spin_lock(&dum->lock); list_for_each_entry(iter, &ep->queue, queue) { if (&iter->req != _req) continue; list_del_init(&iter->queue); _req->status = -ECONNRESET; req = iter; retval = 0; break; } spin_unlock(&dum->lock); if (retval == 0) { dev_dbg(udc_dev(dum), "dequeued req %p from %s, len %d buf %p\n", req, _ep->name, _req->length, _req->buf); usb_gadget_giveback_request(_ep, _req); } local_irq_restore(flags); return retval; } static int dummy_set_halt_and_wedge(struct usb_ep *_ep, int value, int wedged) { struct dummy_ep *ep; struct dummy *dum; if (!_ep) return -EINVAL; ep = usb_ep_to_dummy_ep(_ep); dum = ep_to_dummy(ep); if (!dum->driver) return -ESHUTDOWN; if (!value) ep->halted = ep->wedged = 0; else if (ep->desc && (ep->desc->bEndpointAddress & USB_DIR_IN) && !list_empty(&ep->queue)) return -EAGAIN; else { ep->halted = 1; if (wedged) ep->wedged = 1; } /* FIXME clear emulated data toggle too */ return 0; } static int dummy_set_halt(struct usb_ep *_ep, int value) { return dummy_set_halt_and_wedge(_ep, value, 0); } static int dummy_set_wedge(struct usb_ep *_ep) { if (!_ep || _ep->name == ep0name) return -EINVAL; return dummy_set_halt_and_wedge(_ep, 1, 1); } static const struct usb_ep_ops dummy_ep_ops = { .enable = dummy_enable, .disable = dummy_disable, .alloc_request = dummy_alloc_request, .free_request = dummy_free_request, .queue = dummy_queue, .dequeue = dummy_dequeue, .set_halt = dummy_set_halt, .set_wedge = dummy_set_wedge, }; /*-------------------------------------------------------------------------*/ /* there are both host and device side versions of this call ... */ static int dummy_g_get_frame(struct usb_gadget *_gadget) { struct timespec64 ts64; ktime_get_ts64(&ts64); return ts64.tv_nsec / NSEC_PER_MSEC; } static int dummy_wakeup(struct usb_gadget *_gadget) { struct dummy_hcd *dum_hcd; dum_hcd = gadget_to_dummy_hcd(_gadget); if (!(dum_hcd->dum->devstatus & ((1 << USB_DEVICE_B_HNP_ENABLE) | (1 << USB_DEVICE_REMOTE_WAKEUP)))) return -EINVAL; if ((dum_hcd->port_status & USB_PORT_STAT_CONNECTION) == 0) return -ENOLINK; if ((dum_hcd->port_status & USB_PORT_STAT_SUSPEND) == 0 && dum_hcd->rh_state != DUMMY_RH_SUSPENDED) return -EIO; /* FIXME: What if the root hub is suspended but the port isn't? */ /* hub notices our request, issues downstream resume, etc */ dum_hcd->resuming = 1; dum_hcd->re_timeout = jiffies + msecs_to_jiffies(20); mod_timer(&dummy_hcd_to_hcd(dum_hcd)->rh_timer, dum_hcd->re_timeout); return 0; } static int dummy_set_selfpowered(struct usb_gadget *_gadget, int value) { struct dummy *dum; _gadget->is_selfpowered = (value != 0); dum = gadget_to_dummy_hcd(_gadget)->dum; if (value) dum->devstatus |= (1 << USB_DEVICE_SELF_POWERED); else dum->devstatus &= ~(1 << USB_DEVICE_SELF_POWERED); return 0; } static void dummy_udc_update_ep0(struct dummy *dum) { if (dum->gadget.speed == USB_SPEED_SUPER) dum->ep[0].ep.maxpacket = 9; else dum->ep[0].ep.maxpacket = 64; } static int dummy_pullup(struct usb_gadget *_gadget, int value) { struct dummy_hcd *dum_hcd; struct dummy *dum; unsigned long flags; dum = gadget_dev_to_dummy(&_gadget->dev); dum_hcd = gadget_to_dummy_hcd(_gadget); spin_lock_irqsave(&dum->lock, flags); dum->pullup = (value != 0); set_link_state(dum_hcd); if (value == 0) { /* * Emulate synchronize_irq(): wait for callbacks to finish. * This seems to be the best place to emulate the call to * synchronize_irq() that's in usb_gadget_remove_driver(). * Doing it in dummy_udc_stop() would be too late since it * is called after the unbind callback and unbind shouldn't * be invoked until all the other callbacks are finished. */ while (dum->callback_usage > 0) { spin_unlock_irqrestore(&dum->lock, flags); usleep_range(1000, 2000); spin_lock_irqsave(&dum->lock, flags); } } spin_unlock_irqrestore(&dum->lock, flags); usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd)); return 0; } static void dummy_udc_set_speed(struct usb_gadget *_gadget, enum usb_device_speed speed) { struct dummy *dum; dum = gadget_dev_to_dummy(&_gadget->dev); dum->gadget.speed = speed; dummy_udc_update_ep0(dum); } static void dummy_udc_async_callbacks(struct usb_gadget *_gadget, bool enable) { struct dummy *dum = gadget_dev_to_dummy(&_gadget->dev); spin_lock_irq(&dum->lock); dum->ints_enabled = enable; spin_unlock_irq(&dum->lock); } static int dummy_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver); static int dummy_udc_stop(struct usb_gadget *g); static const struct usb_gadget_ops dummy_ops = { .get_frame = dummy_g_get_frame, .wakeup = dummy_wakeup, .set_selfpowered = dummy_set_selfpowered, .pullup = dummy_pullup, .udc_start = dummy_udc_start, .udc_stop = dummy_udc_stop, .udc_set_speed = dummy_udc_set_speed, .udc_async_callbacks = dummy_udc_async_callbacks, }; /*-------------------------------------------------------------------------*/ /* "function" sysfs attribute */ static ssize_t function_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dummy *dum = gadget_dev_to_dummy(dev); if (!dum->driver || !dum->driver->function) return 0; return scnprintf(buf, PAGE_SIZE, "%s\n", dum->driver->function); } static DEVICE_ATTR_RO(function); /*-------------------------------------------------------------------------*/ /* * Driver registration/unregistration. * * This is basically hardware-specific; there's usually only one real USB * device (not host) controller since that's how USB devices are intended * to work. So most implementations of these api calls will rely on the * fact that only one driver will ever bind to the hardware. But curious * hardware can be built with discrete components, so the gadget API doesn't * require that assumption. * * For this emulator, it might be convenient to create a usb device * for each driver that registers: just add to a big root hub. */ static int dummy_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; switch (g->speed) { /* All the speeds we support */ case USB_SPEED_LOW: case USB_SPEED_FULL: case USB_SPEED_HIGH: case USB_SPEED_SUPER: break; default: dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n", driver->max_speed); return -EINVAL; } /* * DEVICE side init ... the layer above hardware, which * can't enumerate without help from the driver we're binding. */ spin_lock_irq(&dum->lock); dum->devstatus = 0; dum->driver = driver; spin_unlock_irq(&dum->lock); return 0; } static int dummy_udc_stop(struct usb_gadget *g) { struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; spin_lock_irq(&dum->lock); dum->ints_enabled = 0; stop_activity(dum); dum->driver = NULL; spin_unlock_irq(&dum->lock); return 0; } #undef is_enabled /* The gadget structure is stored inside the hcd structure and will be * released along with it. */ static void init_dummy_udc_hw(struct dummy *dum) { int i; INIT_LIST_HEAD(&dum->gadget.ep_list); for (i = 0; i < DUMMY_ENDPOINTS; i++) { struct dummy_ep *ep = &dum->ep[i]; if (!ep_info[i].name) break; ep->ep.name = ep_info[i].name; ep->ep.caps = ep_info[i].caps; ep->ep.ops = &dummy_ep_ops; list_add_tail(&ep->ep.ep_list, &dum->gadget.ep_list); ep->halted = ep->wedged = ep->already_seen = ep->setup_stage = 0; usb_ep_set_maxpacket_limit(&ep->ep, ~0); ep->ep.max_streams = 16; ep->last_io = jiffies; ep->gadget = &dum->gadget; ep->desc = NULL; INIT_LIST_HEAD(&ep->queue); } dum->gadget.ep0 = &dum->ep[0].ep; list_del_init(&dum->ep[0].ep.ep_list); INIT_LIST_HEAD(&dum->fifo_req.queue); #ifdef CONFIG_USB_OTG dum->gadget.is_otg = 1; #endif } static int dummy_udc_probe(struct platform_device *pdev) { struct dummy *dum; int rc; dum = *((void **)dev_get_platdata(&pdev->dev)); /* Clear usb_gadget region for new registration to udc-core */ memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; if (mod_data.is_super_speed) dum->gadget.max_speed = USB_SPEED_SUPER; else if (mod_data.is_high_speed) dum->gadget.max_speed = USB_SPEED_HIGH; else dum->gadget.max_speed = USB_SPEED_FULL; dum->gadget.dev.parent = &pdev->dev; init_dummy_udc_hw(dum); rc = usb_add_gadget_udc(&pdev->dev, &dum->gadget); if (rc < 0) goto err_udc; rc = device_create_file(&dum->gadget.dev, &dev_attr_function); if (rc < 0) goto err_dev; platform_set_drvdata(pdev, dum); return rc; err_dev: usb_del_gadget_udc(&dum->gadget); err_udc: return rc; } static void dummy_udc_remove(struct platform_device *pdev) { struct dummy *dum = platform_get_drvdata(pdev); device_remove_file(&dum->gadget.dev, &dev_attr_function); usb_del_gadget_udc(&dum->gadget); } static void dummy_udc_pm(struct dummy *dum, struct dummy_hcd *dum_hcd, int suspend) { spin_lock_irq(&dum->lock); dum->udc_suspended = suspend; set_link_state(dum_hcd); spin_unlock_irq(&dum->lock); } static int dummy_udc_suspend(struct platform_device *pdev, pm_message_t state) { struct dummy *dum = platform_get_drvdata(pdev); struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(&dum->gadget); dev_dbg(&pdev->dev, "%s\n", __func__); dummy_udc_pm(dum, dum_hcd, 1); usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd)); return 0; } static int dummy_udc_resume(struct platform_device *pdev) { struct dummy *dum = platform_get_drvdata(pdev); struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(&dum->gadget); dev_dbg(&pdev->dev, "%s\n", __func__); dummy_udc_pm(dum, dum_hcd, 0); usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd)); return 0; } static struct platform_driver dummy_udc_driver = { .probe = dummy_udc_probe, .remove_new = dummy_udc_remove, .suspend = dummy_udc_suspend, .resume = dummy_udc_resume, .driver = { .name = gadget_name, }, }; /*-------------------------------------------------------------------------*/ static unsigned int dummy_get_ep_idx(const struct usb_endpoint_descriptor *desc) { unsigned int index; index = usb_endpoint_num(desc) << 1; if (usb_endpoint_dir_in(desc)) index |= 1; return index; } /* HOST SIDE DRIVER * * this uses the hcd framework to hook up to host side drivers. * its root hub will only have one device, otherwise it acts like * a normal host controller. * * when urbs are queued, they're just stuck on a list that we * scan in a timer callback. that callback connects writes from * the host with reads from the device, and so on, based on the * usb 2.0 rules. */ static int dummy_ep_stream_en(struct dummy_hcd *dum_hcd, struct urb *urb) { const struct usb_endpoint_descriptor *desc = &urb->ep->desc; u32 index; if (!usb_endpoint_xfer_bulk(desc)) return 0; index = dummy_get_ep_idx(desc); return (1 << index) & dum_hcd->stream_en_ep; } /* * The max stream number is saved as a nibble so for the 30 possible endpoints * we only 15 bytes of memory. Therefore we are limited to max 16 streams (0 * means we use only 1 stream). The maximum according to the spec is 16bit so * if the 16 stream limit is about to go, the array size should be incremented * to 30 elements of type u16. */ static int get_max_streams_for_pipe(struct dummy_hcd *dum_hcd, unsigned int pipe) { int max_streams; max_streams = dum_hcd->num_stream[usb_pipeendpoint(pipe)]; if (usb_pipeout(pipe)) max_streams >>= 4; else max_streams &= 0xf; max_streams++; return max_streams; } static void set_max_streams_for_pipe(struct dummy_hcd *dum_hcd, unsigned int pipe, unsigned int streams) { int max_streams; streams--; max_streams = dum_hcd->num_stream[usb_pipeendpoint(pipe)]; if (usb_pipeout(pipe)) { streams <<= 4; max_streams &= 0xf; } else { max_streams &= 0xf0; } max_streams |= streams; dum_hcd->num_stream[usb_pipeendpoint(pipe)] = max_streams; } static int dummy_validate_stream(struct dummy_hcd *dum_hcd, struct urb *urb) { unsigned int max_streams; int enabled; enabled = dummy_ep_stream_en(dum_hcd, urb); if (!urb->stream_id) { if (enabled) return -EINVAL; return 0; } if (!enabled) return -EINVAL; max_streams = get_max_streams_for_pipe(dum_hcd, usb_pipeendpoint(urb->pipe)); if (urb->stream_id > max_streams) { dev_err(dummy_dev(dum_hcd), "Stream id %d is out of range.\n", urb->stream_id); BUG(); return -EINVAL; } return 0; } static int dummy_urb_enqueue( struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags ) { struct dummy_hcd *dum_hcd; struct urbp *urbp; unsigned long flags; int rc; urbp = kmalloc(sizeof *urbp, mem_flags); if (!urbp) return -ENOMEM; urbp->urb = urb; urbp->miter_started = 0; dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); rc = dummy_validate_stream(dum_hcd, urb); if (rc) { kfree(urbp); goto done; } rc = usb_hcd_link_urb_to_ep(hcd, urb); if (rc) { kfree(urbp); goto done; } if (!dum_hcd->udev) { dum_hcd->udev = urb->dev; usb_get_dev(dum_hcd->udev); } else if (unlikely(dum_hcd->udev != urb->dev)) dev_err(dummy_dev(dum_hcd), "usb_device address has changed!\n"); list_add_tail(&urbp->urbp_list, &dum_hcd->urbp_list); urb->hcpriv = urbp; if (!dum_hcd->next_frame_urbp) dum_hcd->next_frame_urbp = urbp; if (usb_pipetype(urb->pipe) == PIPE_CONTROL) urb->error_count = 1; /* mark as a new urb */ /* kick the scheduler, it'll do the rest */ if (!timer_pending(&dum_hcd->timer)) mod_timer(&dum_hcd->timer, jiffies + 1); done: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return rc; } static int dummy_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) { struct dummy_hcd *dum_hcd; unsigned long flags; int rc; /* giveback happens automatically in timer callback, * so make sure the callback happens */ dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); rc = usb_hcd_check_unlink_urb(hcd, urb, status); if (!rc && dum_hcd->rh_state != DUMMY_RH_RUNNING && !list_empty(&dum_hcd->urbp_list)) mod_timer(&dum_hcd->timer, jiffies); spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return rc; } static int dummy_perform_transfer(struct urb *urb, struct dummy_request *req, u32 len) { void *ubuf, *rbuf; struct urbp *urbp = urb->hcpriv; int to_host; struct sg_mapping_iter *miter = &urbp->miter; u32 trans = 0; u32 this_sg; bool next_sg; to_host = usb_urb_dir_in(urb); rbuf = req->req.buf + req->req.actual; if (!urb->num_sgs) { ubuf = urb->transfer_buffer + urb->actual_length; if (to_host) memcpy(ubuf, rbuf, len); else memcpy(rbuf, ubuf, len); return len; } if (!urbp->miter_started) { u32 flags = SG_MITER_ATOMIC; if (to_host) flags |= SG_MITER_TO_SG; else flags |= SG_MITER_FROM_SG; sg_miter_start(miter, urb->sg, urb->num_sgs, flags); urbp->miter_started = 1; } next_sg = sg_miter_next(miter); if (next_sg == false) { WARN_ON_ONCE(1); return -EINVAL; } do { ubuf = miter->addr; this_sg = min_t(u32, len, miter->length); miter->consumed = this_sg; trans += this_sg; if (to_host) memcpy(ubuf, rbuf, this_sg); else memcpy(rbuf, ubuf, this_sg); len -= this_sg; if (!len) break; next_sg = sg_miter_next(miter); if (next_sg == false) { WARN_ON_ONCE(1); return -EINVAL; } rbuf += this_sg; } while (1); sg_miter_stop(miter); return trans; } /* transfer up to a frame's worth; caller must own lock */ static int transfer(struct dummy_hcd *dum_hcd, struct urb *urb, struct dummy_ep *ep, int limit, int *status) { struct dummy *dum = dum_hcd->dum; struct dummy_request *req; int sent = 0; top: /* if there's no request queued, the device is NAKing; return */ list_for_each_entry(req, &ep->queue, queue) { unsigned host_len, dev_len, len; int is_short, to_host; int rescan = 0; if (dummy_ep_stream_en(dum_hcd, urb)) { if ((urb->stream_id != req->req.stream_id)) continue; } /* 1..N packets of ep->ep.maxpacket each ... the last one * may be short (including zero length). * * writer can send a zlp explicitly (length 0) or implicitly * (length mod maxpacket zero, and 'zero' flag); they always * terminate reads. */ host_len = urb->transfer_buffer_length - urb->actual_length; dev_len = req->req.length - req->req.actual; len = min(host_len, dev_len); /* FIXME update emulated data toggle too */ to_host = usb_urb_dir_in(urb); if (unlikely(len == 0)) is_short = 1; else { /* not enough bandwidth left? */ if (limit < ep->ep.maxpacket && limit < len) break; len = min_t(unsigned, len, limit); if (len == 0) break; /* send multiple of maxpacket first, then remainder */ if (len >= ep->ep.maxpacket) { is_short = 0; if (len % ep->ep.maxpacket) rescan = 1; len -= len % ep->ep.maxpacket; } else { is_short = 1; } len = dummy_perform_transfer(urb, req, len); ep->last_io = jiffies; if ((int)len < 0) { req->req.status = len; } else { limit -= len; sent += len; urb->actual_length += len; req->req.actual += len; } } /* short packets terminate, maybe with overflow/underflow. * it's only really an error to write too much. * * partially filling a buffer optionally blocks queue advances * (so completion handlers can clean up the queue) but we don't * need to emulate such data-in-flight. */ if (is_short) { if (host_len == dev_len) { req->req.status = 0; *status = 0; } else if (to_host) { req->req.status = 0; if (dev_len > host_len) *status = -EOVERFLOW; else *status = 0; } else { *status = 0; if (host_len > dev_len) req->req.status = -EOVERFLOW; else req->req.status = 0; } /* * many requests terminate without a short packet. * send a zlp if demanded by flags. */ } else { if (req->req.length == req->req.actual) { if (req->req.zero && to_host) rescan = 1; else req->req.status = 0; } if (urb->transfer_buffer_length == urb->actual_length) { if (urb->transfer_flags & URB_ZERO_PACKET && !to_host) rescan = 1; else *status = 0; } } /* device side completion --> continuable */ if (req->req.status != -EINPROGRESS) { list_del_init(&req->queue); spin_unlock(&dum->lock); usb_gadget_giveback_request(&ep->ep, &req->req); spin_lock(&dum->lock); /* requests might have been unlinked... */ rescan = 1; } /* host side completion --> terminate */ if (*status != -EINPROGRESS) break; /* rescan to continue with any other queued i/o */ if (rescan) goto top; } return sent; } static int periodic_bytes(struct dummy *dum, struct dummy_ep *ep) { int limit = ep->ep.maxpacket; if (dum->gadget.speed == USB_SPEED_HIGH) { int tmp; /* high bandwidth mode */ tmp = usb_endpoint_maxp_mult(ep->desc); tmp *= 8 /* applies to entire frame */; limit += limit * tmp; } if (dum->gadget.speed == USB_SPEED_SUPER) { switch (usb_endpoint_type(ep->desc)) { case USB_ENDPOINT_XFER_ISOC: /* Sec. 4.4.8.2 USB3.0 Spec */ limit = 3 * 16 * 1024 * 8; break; case USB_ENDPOINT_XFER_INT: /* Sec. 4.4.7.2 USB3.0 Spec */ limit = 3 * 1024 * 8; break; case USB_ENDPOINT_XFER_BULK: default: break; } } return limit; } #define is_active(dum_hcd) ((dum_hcd->port_status & \ (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE | \ USB_PORT_STAT_SUSPEND)) \ == (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE)) static struct dummy_ep *find_endpoint(struct dummy *dum, u8 address) { int i; if (!is_active((dum->gadget.speed == USB_SPEED_SUPER ? dum->ss_hcd : dum->hs_hcd))) return NULL; if (!dum->ints_enabled) return NULL; if ((address & ~USB_DIR_IN) == 0) return &dum->ep[0]; for (i = 1; i < DUMMY_ENDPOINTS; i++) { struct dummy_ep *ep = &dum->ep[i]; if (!ep->desc) continue; if (ep->desc->bEndpointAddress == address) return ep; } return NULL; } #undef is_active #define Dev_Request (USB_TYPE_STANDARD | USB_RECIP_DEVICE) #define Dev_InRequest (Dev_Request | USB_DIR_IN) #define Intf_Request (USB_TYPE_STANDARD | USB_RECIP_INTERFACE) #define Intf_InRequest (Intf_Request | USB_DIR_IN) #define Ep_Request (USB_TYPE_STANDARD | USB_RECIP_ENDPOINT) #define Ep_InRequest (Ep_Request | USB_DIR_IN) /** * handle_control_request() - handles all control transfers * @dum_hcd: pointer to dummy (the_controller) * @urb: the urb request to handle * @setup: pointer to the setup data for a USB device control * request * @status: pointer to request handling status * * Return 0 - if the request was handled * 1 - if the request wasn't handles * error code on error */ static int handle_control_request(struct dummy_hcd *dum_hcd, struct urb *urb, struct usb_ctrlrequest *setup, int *status) { struct dummy_ep *ep2; struct dummy *dum = dum_hcd->dum; int ret_val = 1; unsigned w_index; unsigned w_value; w_index = le16_to_cpu(setup->wIndex); w_value = le16_to_cpu(setup->wValue); switch (setup->bRequest) { case USB_REQ_SET_ADDRESS: if (setup->bRequestType != Dev_Request) break; dum->address = w_value; *status = 0; dev_dbg(udc_dev(dum), "set_address = %d\n", w_value); ret_val = 0; break; case USB_REQ_SET_FEATURE: if (setup->bRequestType == Dev_Request) { ret_val = 0; switch (w_value) { case USB_DEVICE_REMOTE_WAKEUP: break; case USB_DEVICE_B_HNP_ENABLE: dum->gadget.b_hnp_enable = 1; break; case USB_DEVICE_A_HNP_SUPPORT: dum->gadget.a_hnp_support = 1; break; case USB_DEVICE_A_ALT_HNP_SUPPORT: dum->gadget.a_alt_hnp_support = 1; break; case USB_DEVICE_U1_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U1_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_U2_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U2_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_LTM_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_LTM_ENABLED; else ret_val = -EOPNOTSUPP; break; default: ret_val = -EOPNOTSUPP; } if (ret_val == 0) { dum->devstatus |= (1 << w_value); *status = 0; } } else if (setup->bRequestType == Ep_Request) { /* endpoint halt */ ep2 = find_endpoint(dum, w_index); if (!ep2 || ep2->ep.name == ep0name) { ret_val = -EOPNOTSUPP; break; } ep2->halted = 1; ret_val = 0; *status = 0; } break; case USB_REQ_CLEAR_FEATURE: if (setup->bRequestType == Dev_Request) { ret_val = 0; switch (w_value) { case USB_DEVICE_REMOTE_WAKEUP: w_value = USB_DEVICE_REMOTE_WAKEUP; break; case USB_DEVICE_U1_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U1_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_U2_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U2_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_LTM_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_LTM_ENABLED; else ret_val = -EOPNOTSUPP; break; default: ret_val = -EOPNOTSUPP; break; } if (ret_val == 0) { dum->devstatus &= ~(1 << w_value); *status = 0; } } else if (setup->bRequestType == Ep_Request) { /* endpoint halt */ ep2 = find_endpoint(dum, w_index); if (!ep2) { ret_val = -EOPNOTSUPP; break; } if (!ep2->wedged) ep2->halted = 0; ret_val = 0; *status = 0; } break; case USB_REQ_GET_STATUS: if (setup->bRequestType == Dev_InRequest || setup->bRequestType == Intf_InRequest || setup->bRequestType == Ep_InRequest) { char *buf; /* * device: remote wakeup, selfpowered * interface: nothing * endpoint: halt */ buf = (char *)urb->transfer_buffer; if (urb->transfer_buffer_length > 0) { if (setup->bRequestType == Ep_InRequest) { ep2 = find_endpoint(dum, w_index); if (!ep2) { ret_val = -EOPNOTSUPP; break; } buf[0] = ep2->halted; } else if (setup->bRequestType == Dev_InRequest) { buf[0] = (u8)dum->devstatus; } else buf[0] = 0; } if (urb->transfer_buffer_length > 1) buf[1] = 0; urb->actual_length = min_t(u32, 2, urb->transfer_buffer_length); ret_val = 0; *status = 0; } break; } return ret_val; } /* * Drive both sides of the transfers; looks like irq handlers to both * drivers except that the callbacks are invoked from soft interrupt * context. */ static void dummy_timer(struct timer_list *t) { struct dummy_hcd *dum_hcd = from_timer(dum_hcd, t, timer); struct dummy *dum = dum_hcd->dum; struct urbp *urbp, *tmp; unsigned long flags; int limit, total; int i; /* simplistic model for one frame's bandwidth */ /* FIXME: account for transaction and packet overhead */ switch (dum->gadget.speed) { case USB_SPEED_LOW: total = 8/*bytes*/ * 12/*packets*/; break; case USB_SPEED_FULL: total = 64/*bytes*/ * 19/*packets*/; break; case USB_SPEED_HIGH: total = 512/*bytes*/ * 13/*packets*/ * 8/*uframes*/; break; case USB_SPEED_SUPER: /* Bus speed is 500000 bytes/ms, so use a little less */ total = 490000; break; default: /* Can't happen */ dev_err(dummy_dev(dum_hcd), "bogus device speed\n"); total = 0; break; } /* FIXME if HZ != 1000 this will probably misbehave ... */ /* look at each urb queued by the host side driver */ spin_lock_irqsave(&dum->lock, flags); if (!dum_hcd->udev) { dev_err(dummy_dev(dum_hcd), "timer fired with no URBs pending?\n"); spin_unlock_irqrestore(&dum->lock, flags); return; } dum_hcd->next_frame_urbp = NULL; for (i = 0; i < DUMMY_ENDPOINTS; i++) { if (!ep_info[i].name) break; dum->ep[i].already_seen = 0; } restart: list_for_each_entry_safe(urbp, tmp, &dum_hcd->urbp_list, urbp_list) { struct urb *urb; struct dummy_request *req; u8 address; struct dummy_ep *ep = NULL; int status = -EINPROGRESS; /* stop when we reach URBs queued after the timer interrupt */ if (urbp == dum_hcd->next_frame_urbp) break; urb = urbp->urb; if (urb->unlinked) goto return_urb; else if (dum_hcd->rh_state != DUMMY_RH_RUNNING) continue; /* Used up this frame's bandwidth? */ if (total <= 0) continue; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); if (usb_urb_dir_in(urb)) address |= USB_DIR_IN; ep = find_endpoint(dum, address); if (!ep) { /* set_configuration() disagreement */ dev_dbg(dummy_dev(dum_hcd), "no ep configured for urb %p\n", urb); status = -EPROTO; goto return_urb; } if (ep->already_seen) continue; ep->already_seen = 1; if (ep == &dum->ep[0] && urb->error_count) { ep->setup_stage = 1; /* a new urb */ urb->error_count = 0; } if (ep->halted && !ep->setup_stage) { /* NOTE: must not be iso! */ dev_dbg(dummy_dev(dum_hcd), "ep %s halted, urb %p\n", ep->ep.name, urb); status = -EPIPE; goto return_urb; } /* FIXME make sure both ends agree on maxpacket */ /* handle control requests */ if (ep == &dum->ep[0] && ep->setup_stage) { struct usb_ctrlrequest setup; int value; setup = *(struct usb_ctrlrequest *) urb->setup_packet; /* paranoia, in case of stale queued data */ list_for_each_entry(req, &ep->queue, queue) { list_del_init(&req->queue); req->req.status = -EOVERFLOW; dev_dbg(udc_dev(dum), "stale req = %p\n", req); spin_unlock(&dum->lock); usb_gadget_giveback_request(&ep->ep, &req->req); spin_lock(&dum->lock); ep->already_seen = 0; goto restart; } /* gadget driver never sees set_address or operations * on standard feature flags. some hardware doesn't * even expose them. */ ep->last_io = jiffies; ep->setup_stage = 0; ep->halted = 0; value = handle_control_request(dum_hcd, urb, &setup, &status); /* gadget driver handles all other requests. block * until setup() returns; no reentrancy issues etc. */ if (value > 0) { ++dum->callback_usage; spin_unlock(&dum->lock); value = dum->driver->setup(&dum->gadget, &setup); spin_lock(&dum->lock); --dum->callback_usage; if (value >= 0) { /* no delays (max 64KB data stage) */ limit = 64*1024; goto treat_control_like_bulk; } /* error, see below */ } if (value < 0) { if (value != -EOPNOTSUPP) dev_dbg(udc_dev(dum), "setup --> %d\n", value); status = -EPIPE; urb->actual_length = 0; } goto return_urb; } /* non-control requests */ limit = total; switch (usb_pipetype(urb->pipe)) { case PIPE_ISOCHRONOUS: /* * We don't support isochronous. But if we did, * here are some of the issues we'd have to face: * * Is it urb->interval since the last xfer? * Use urb->iso_frame_desc[i]. * Complete whether or not ep has requests queued. * Report random errors, to debug drivers. */ limit = max(limit, periodic_bytes(dum, ep)); status = -EINVAL; /* fail all xfers */ break; case PIPE_INTERRUPT: /* FIXME is it urb->interval since the last xfer? * this almost certainly polls too fast. */ limit = max(limit, periodic_bytes(dum, ep)); fallthrough; default: treat_control_like_bulk: ep->last_io = jiffies; total -= transfer(dum_hcd, urb, ep, limit, &status); break; } /* incomplete transfer? */ if (status == -EINPROGRESS) continue; return_urb: list_del(&urbp->urbp_list); kfree(urbp); if (ep) ep->already_seen = ep->setup_stage = 0; usb_hcd_unlink_urb_from_ep(dummy_hcd_to_hcd(dum_hcd), urb); spin_unlock(&dum->lock); usb_hcd_giveback_urb(dummy_hcd_to_hcd(dum_hcd), urb, status); spin_lock(&dum->lock); goto restart; } if (list_empty(&dum_hcd->urbp_list)) { usb_put_dev(dum_hcd->udev); dum_hcd->udev = NULL; } else if (dum_hcd->rh_state == DUMMY_RH_RUNNING) { /* want a 1 msec delay here */ mod_timer(&dum_hcd->timer, jiffies + msecs_to_jiffies(1)); } spin_unlock_irqrestore(&dum->lock, flags); } /*-------------------------------------------------------------------------*/ #define PORT_C_MASK \ ((USB_PORT_STAT_C_CONNECTION \ | USB_PORT_STAT_C_ENABLE \ | USB_PORT_STAT_C_SUSPEND \ | USB_PORT_STAT_C_OVERCURRENT \ | USB_PORT_STAT_C_RESET) << 16) static int dummy_hub_status(struct usb_hcd *hcd, char *buf) { struct dummy_hcd *dum_hcd; unsigned long flags; int retval = 0; dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); if (!HCD_HW_ACCESSIBLE(hcd)) goto done; if (dum_hcd->resuming && time_after_eq(jiffies, dum_hcd->re_timeout)) { dum_hcd->port_status |= (USB_PORT_STAT_C_SUSPEND << 16); dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND; set_link_state(dum_hcd); } if ((dum_hcd->port_status & PORT_C_MASK) != 0) { *buf = (1 << 1); dev_dbg(dummy_dev(dum_hcd), "port status 0x%08x has changes\n", dum_hcd->port_status); retval = 1; if (dum_hcd->rh_state == DUMMY_RH_SUSPENDED) usb_hcd_resume_root_hub(hcd); } done: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return retval; } /* usb 3.0 root hub device descriptor */ static struct { struct usb_bos_descriptor bos; struct usb_ss_cap_descriptor ss_cap; } __packed usb3_bos_desc = { .bos = { .bLength = USB_DT_BOS_SIZE, .bDescriptorType = USB_DT_BOS, .wTotalLength = cpu_to_le16(sizeof(usb3_bos_desc)), .bNumDeviceCaps = 1, }, .ss_cap = { .bLength = USB_DT_USB_SS_CAP_SIZE, .bDescriptorType = USB_DT_DEVICE_CAPABILITY, .bDevCapabilityType = USB_SS_CAP_TYPE, .wSpeedSupported = cpu_to_le16(USB_5GBPS_OPERATION), .bFunctionalitySupport = ilog2(USB_5GBPS_OPERATION), }, }; static inline void ss_hub_descriptor(struct usb_hub_descriptor *desc) { memset(desc, 0, sizeof *desc); desc->bDescriptorType = USB_DT_SS_HUB; desc->bDescLength = 12; desc->wHubCharacteristics = cpu_to_le16( HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; desc->u.ss.bHubHdrDecLat = 0x04; /* Worst case: 0.4 micro sec*/ desc->u.ss.DeviceRemovable = 0; } static inline void hub_descriptor(struct usb_hub_descriptor *desc) { memset(desc, 0, sizeof *desc); desc->bDescriptorType = USB_DT_HUB; desc->bDescLength = 9; desc->wHubCharacteristics = cpu_to_le16( HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; desc->u.hs.DeviceRemovable[0] = 0; desc->u.hs.DeviceRemovable[1] = 0xff; /* PortPwrCtrlMask */ } static int dummy_hub_control( struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength ) { struct dummy_hcd *dum_hcd; int retval = 0; unsigned long flags; if (!HCD_HW_ACCESSIBLE(hcd)) return -ETIMEDOUT; dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); switch (typeReq) { case ClearHubFeature: break; case ClearPortFeature: switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_SUSPEND req not " "supported for USB 3.0 roothub\n"); goto error; } if (dum_hcd->port_status & USB_PORT_STAT_SUSPEND) { /* 20msec resume signaling */ dum_hcd->resuming = 1; dum_hcd->re_timeout = jiffies + msecs_to_jiffies(20); } break; case USB_PORT_FEAT_POWER: dev_dbg(dummy_dev(dum_hcd), "power-off\n"); if (hcd->speed == HCD_USB3) dum_hcd->port_status &= ~USB_SS_PORT_STAT_POWER; else dum_hcd->port_status &= ~USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; case USB_PORT_FEAT_ENABLE: case USB_PORT_FEAT_C_ENABLE: case USB_PORT_FEAT_C_SUSPEND: /* Not allowed for USB-3 */ if (hcd->speed == HCD_USB3) goto error; fallthrough; case USB_PORT_FEAT_C_CONNECTION: case USB_PORT_FEAT_C_RESET: dum_hcd->port_status &= ~(1 << wValue); set_link_state(dum_hcd); break; default: /* Disallow INDICATOR and C_OVER_CURRENT */ goto error; } break; case GetHubDescriptor: if (hcd->speed == HCD_USB3 && (wLength < USB_DT_SS_HUB_SIZE || wValue != (USB_DT_SS_HUB << 8))) { dev_dbg(dummy_dev(dum_hcd), "Wrong hub descriptor type for " "USB 3.0 roothub.\n"); goto error; } if (hcd->speed == HCD_USB3) ss_hub_descriptor((struct usb_hub_descriptor *) buf); else hub_descriptor((struct usb_hub_descriptor *) buf); break; case DeviceRequest | USB_REQ_GET_DESCRIPTOR: if (hcd->speed != HCD_USB3) goto error; if ((wValue >> 8) != USB_DT_BOS) goto error; memcpy(buf, &usb3_bos_desc, sizeof(usb3_bos_desc)); retval = sizeof(usb3_bos_desc); break; case GetHubStatus: *(__le32 *) buf = cpu_to_le32(0); break; case GetPortStatus: if (wIndex != 1) retval = -EPIPE; /* whoever resets or resumes must GetPortStatus to * complete it!! */ if (dum_hcd->resuming && time_after_eq(jiffies, dum_hcd->re_timeout)) { dum_hcd->port_status |= (USB_PORT_STAT_C_SUSPEND << 16); dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND; } if ((dum_hcd->port_status & USB_PORT_STAT_RESET) != 0 && time_after_eq(jiffies, dum_hcd->re_timeout)) { dum_hcd->port_status |= (USB_PORT_STAT_C_RESET << 16); dum_hcd->port_status &= ~USB_PORT_STAT_RESET; if (dum_hcd->dum->pullup) { dum_hcd->port_status |= USB_PORT_STAT_ENABLE; if (hcd->speed < HCD_USB3) { switch (dum_hcd->dum->gadget.speed) { case USB_SPEED_HIGH: dum_hcd->port_status |= USB_PORT_STAT_HIGH_SPEED; break; case USB_SPEED_LOW: dum_hcd->dum->gadget.ep0-> maxpacket = 8; dum_hcd->port_status |= USB_PORT_STAT_LOW_SPEED; break; default: break; } } } } set_link_state(dum_hcd); ((__le16 *) buf)[0] = cpu_to_le16(dum_hcd->port_status); ((__le16 *) buf)[1] = cpu_to_le16(dum_hcd->port_status >> 16); break; case SetHubFeature: retval = -EPIPE; break; case SetPortFeature: switch (wValue) { case USB_PORT_FEAT_LINK_STATE: if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_LINK_STATE req not " "supported for USB 2.0 roothub\n"); goto error; } /* * Since this is dummy we don't have an actual link so * there is nothing to do for the SET_LINK_STATE cmd */ break; case USB_PORT_FEAT_U1_TIMEOUT: case USB_PORT_FEAT_U2_TIMEOUT: /* TODO: add suspend/resume support! */ if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_U1/2_TIMEOUT req not " "supported for USB 2.0 roothub\n"); goto error; } break; case USB_PORT_FEAT_SUSPEND: /* Applicable only for USB2.0 hub */ if (hcd->speed == HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_SUSPEND req not " "supported for USB 3.0 roothub\n"); goto error; } if (dum_hcd->active) { dum_hcd->port_status |= USB_PORT_STAT_SUSPEND; /* HNP would happen here; for now we * assume b_bus_req is always true. */ set_link_state(dum_hcd); if (((1 << USB_DEVICE_B_HNP_ENABLE) & dum_hcd->dum->devstatus) != 0) dev_dbg(dummy_dev(dum_hcd), "no HNP yet!\n"); } break; case USB_PORT_FEAT_POWER: if (hcd->speed == HCD_USB3) dum_hcd->port_status |= USB_SS_PORT_STAT_POWER; else dum_hcd->port_status |= USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; case USB_PORT_FEAT_BH_PORT_RESET: /* Applicable only for USB3.0 hub */ if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_BH_PORT_RESET req not " "supported for USB 2.0 roothub\n"); goto error; } fallthrough; case USB_PORT_FEAT_RESET: if (!(dum_hcd->port_status & USB_PORT_STAT_CONNECTION)) break; /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { dum_hcd->port_status = (USB_SS_PORT_STAT_POWER | USB_PORT_STAT_CONNECTION | USB_PORT_STAT_RESET); } else { dum_hcd->port_status &= ~(USB_PORT_STAT_ENABLE | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED); dum_hcd->port_status |= USB_PORT_STAT_RESET; } /* * We want to reset device status. All but the * Self powered feature */ dum_hcd->dum->devstatus &= (1 << USB_DEVICE_SELF_POWERED); /* * FIXME USB3.0: what is the correct reset signaling * interval? Is it still 50msec as for HS? */ dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50); set_link_state(dum_hcd); break; case USB_PORT_FEAT_C_CONNECTION: case USB_PORT_FEAT_C_RESET: case USB_PORT_FEAT_C_ENABLE: case USB_PORT_FEAT_C_SUSPEND: /* Not allowed for USB-3, and ignored for USB-2 */ if (hcd->speed == HCD_USB3) goto error; break; default: /* Disallow TEST, INDICATOR, and C_OVER_CURRENT */ goto error; } break; case GetPortErrorCount: if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "GetPortErrorCount req not " "supported for USB 2.0 roothub\n"); goto error; } /* We'll always return 0 since this is a dummy hub */ *(__le32 *) buf = cpu_to_le32(0); break; case SetHubDepth: if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "SetHubDepth req not supported for " "USB 2.0 roothub\n"); goto error; } break; default: dev_dbg(dummy_dev(dum_hcd), "hub control req%04x v%04x i%04x l%d\n", typeReq, wValue, wIndex, wLength); error: /* "protocol stall" on error */ retval = -EPIPE; } spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); if ((dum_hcd->port_status & PORT_C_MASK) != 0) usb_hcd_poll_rh_status(hcd); return retval; } static int dummy_bus_suspend(struct usb_hcd *hcd) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__); spin_lock_irq(&dum_hcd->dum->lock); dum_hcd->rh_state = DUMMY_RH_SUSPENDED; set_link_state(dum_hcd); hcd->state = HC_STATE_SUSPENDED; spin_unlock_irq(&dum_hcd->dum->lock); return 0; } static int dummy_bus_resume(struct usb_hcd *hcd) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); int rc = 0; dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__); spin_lock_irq(&dum_hcd->dum->lock); if (!HCD_HW_ACCESSIBLE(hcd)) { rc = -ESHUTDOWN; } else { dum_hcd->rh_state = DUMMY_RH_RUNNING; set_link_state(dum_hcd); if (!list_empty(&dum_hcd->urbp_list)) mod_timer(&dum_hcd->timer, jiffies); hcd->state = HC_STATE_RUNNING; } spin_unlock_irq(&dum_hcd->dum->lock); return rc; } /*-------------------------------------------------------------------------*/ static inline ssize_t show_urb(char *buf, size_t size, struct urb *urb) { int ep = usb_pipeendpoint(urb->pipe); return scnprintf(buf, size, "urb/%p %s ep%d%s%s len %d/%d\n", urb, ({ char *s; switch (urb->dev->speed) { case USB_SPEED_LOW: s = "ls"; break; case USB_SPEED_FULL: s = "fs"; break; case USB_SPEED_HIGH: s = "hs"; break; case USB_SPEED_SUPER: s = "ss"; break; default: s = "?"; break; } s; }), ep, ep ? (usb_urb_dir_in(urb) ? "in" : "out") : "", ({ char *s; \ switch (usb_pipetype(urb->pipe)) { \ case PIPE_CONTROL: \ s = ""; \ break; \ case PIPE_BULK: \ s = "-bulk"; \ break; \ case PIPE_INTERRUPT: \ s = "-int"; \ break; \ default: \ s = "-iso"; \ break; \ } s; }), urb->actual_length, urb->transfer_buffer_length); } static ssize_t urbs_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); struct urbp *urbp; size_t size = 0; unsigned long flags; spin_lock_irqsave(&dum_hcd->dum->lock, flags); list_for_each_entry(urbp, &dum_hcd->urbp_list, urbp_list) { size_t temp; temp = show_urb(buf, PAGE_SIZE - size, urbp->urb); buf += temp; size += temp; } spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return size; } static DEVICE_ATTR_RO(urbs); static int dummy_start_ss(struct dummy_hcd *dum_hcd) { timer_setup(&dum_hcd->timer, dummy_timer, 0); dum_hcd->rh_state = DUMMY_RH_RUNNING; dum_hcd->stream_en_ep = 0; INIT_LIST_HEAD(&dum_hcd->urbp_list); dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET_3; dummy_hcd_to_hcd(dum_hcd)->state = HC_STATE_RUNNING; dummy_hcd_to_hcd(dum_hcd)->uses_new_polling = 1; #ifdef CONFIG_USB_OTG dummy_hcd_to_hcd(dum_hcd)->self.otg_port = 1; #endif return 0; /* FIXME 'urbs' should be a per-device thing, maybe in usbcore */ return device_create_file(dummy_dev(dum_hcd), &dev_attr_urbs); } static int dummy_start(struct usb_hcd *hcd) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); /* * HOST side init ... we emulate a root hub that'll only ever * talk to one device (the gadget side). Also appears in sysfs, * just like more familiar pci-based HCDs. */ if (!usb_hcd_is_primary_hcd(hcd)) return dummy_start_ss(dum_hcd); spin_lock_init(&dum_hcd->dum->lock); timer_setup(&dum_hcd->timer, dummy_timer, 0); dum_hcd->rh_state = DUMMY_RH_RUNNING; INIT_LIST_HEAD(&dum_hcd->urbp_list); hcd->power_budget = POWER_BUDGET; hcd->state = HC_STATE_RUNNING; hcd->uses_new_polling = 1; #ifdef CONFIG_USB_OTG hcd->self.otg_port = 1; #endif /* FIXME 'urbs' should be a per-device thing, maybe in usbcore */ return device_create_file(dummy_dev(dum_hcd), &dev_attr_urbs); } static void dummy_stop(struct usb_hcd *hcd) { device_remove_file(dummy_dev(hcd_to_dummy_hcd(hcd)), &dev_attr_urbs); dev_info(dummy_dev(hcd_to_dummy_hcd(hcd)), "stopped\n"); } /*-------------------------------------------------------------------------*/ static int dummy_h_get_frame(struct usb_hcd *hcd) { return dummy_g_get_frame(NULL); } static int dummy_setup(struct usb_hcd *hcd) { struct dummy *dum; dum = *((void **)dev_get_platdata(hcd->self.controller)); hcd->self.sg_tablesize = ~0; if (usb_hcd_is_primary_hcd(hcd)) { dum->hs_hcd = hcd_to_dummy_hcd(hcd); dum->hs_hcd->dum = dum; /* * Mark the first roothub as being USB 2.0. * The USB 3.0 roothub will be registered later by * dummy_hcd_probe() */ hcd->speed = HCD_USB2; hcd->self.root_hub->speed = USB_SPEED_HIGH; } else { dum->ss_hcd = hcd_to_dummy_hcd(hcd); dum->ss_hcd->dum = dum; hcd->speed = HCD_USB3; hcd->self.root_hub->speed = USB_SPEED_SUPER; } return 0; } /* Change a group of bulk endpoints to support multiple stream IDs */ static int dummy_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, unsigned int num_streams, gfp_t mem_flags) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); unsigned long flags; int max_stream; int ret_streams = num_streams; unsigned int index; unsigned int i; if (!num_eps) return -EINVAL; spin_lock_irqsave(&dum_hcd->dum->lock, flags); for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); if ((1 << index) & dum_hcd->stream_en_ep) { ret_streams = -EINVAL; goto out; } max_stream = usb_ss_max_streams(&eps[i]->ss_ep_comp); if (!max_stream) { ret_streams = -EINVAL; goto out; } if (max_stream < ret_streams) { dev_dbg(dummy_dev(dum_hcd), "Ep 0x%x only supports %u " "stream IDs.\n", eps[i]->desc.bEndpointAddress, max_stream); ret_streams = max_stream; } } for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); dum_hcd->stream_en_ep |= 1 << index; set_max_streams_for_pipe(dum_hcd, usb_endpoint_num(&eps[i]->desc), ret_streams); } out: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return ret_streams; } /* Reverts a group of bulk endpoints back to not using stream IDs. */ static int dummy_free_streams(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, gfp_t mem_flags) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); unsigned long flags; int ret; unsigned int index; unsigned int i; spin_lock_irqsave(&dum_hcd->dum->lock, flags); for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); if (!((1 << index) & dum_hcd->stream_en_ep)) { ret = -EINVAL; goto out; } } for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); dum_hcd->stream_en_ep &= ~(1 << index); set_max_streams_for_pipe(dum_hcd, usb_endpoint_num(&eps[i]->desc), 0); } ret = 0; out: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return ret; } static struct hc_driver dummy_hcd = { .description = (char *) driver_name, .product_desc = "Dummy host controller", .hcd_priv_size = sizeof(struct dummy_hcd), .reset = dummy_setup, .start = dummy_start, .stop = dummy_stop, .urb_enqueue = dummy_urb_enqueue, .urb_dequeue = dummy_urb_dequeue, .get_frame_number = dummy_h_get_frame, .hub_status_data = dummy_hub_status, .hub_control = dummy_hub_control, .bus_suspend = dummy_bus_suspend, .bus_resume = dummy_bus_resume, .alloc_streams = dummy_alloc_streams, .free_streams = dummy_free_streams, }; static int dummy_hcd_probe(struct platform_device *pdev) { struct dummy *dum; struct usb_hcd *hs_hcd; struct usb_hcd *ss_hcd; int retval; dev_info(&pdev->dev, "%s, driver " DRIVER_VERSION "\n", driver_desc); dum = *((void **)dev_get_platdata(&pdev->dev)); if (mod_data.is_super_speed) dummy_hcd.flags = HCD_USB3 | HCD_SHARED; else if (mod_data.is_high_speed) dummy_hcd.flags = HCD_USB2; else dummy_hcd.flags = HCD_USB11; hs_hcd = usb_create_hcd(&dummy_hcd, &pdev->dev, dev_name(&pdev->dev)); if (!hs_hcd) return -ENOMEM; hs_hcd->has_tt = 1; retval = usb_add_hcd(hs_hcd, 0, 0); if (retval) goto put_usb2_hcd; if (mod_data.is_super_speed) { ss_hcd = usb_create_shared_hcd(&dummy_hcd, &pdev->dev, dev_name(&pdev->dev), hs_hcd); if (!ss_hcd) { retval = -ENOMEM; goto dealloc_usb2_hcd; } retval = usb_add_hcd(ss_hcd, 0, 0); if (retval) goto put_usb3_hcd; } return 0; put_usb3_hcd: usb_put_hcd(ss_hcd); dealloc_usb2_hcd: usb_remove_hcd(hs_hcd); put_usb2_hcd: usb_put_hcd(hs_hcd); dum->hs_hcd = dum->ss_hcd = NULL; return retval; } static void dummy_hcd_remove(struct platform_device *pdev) { struct dummy *dum; dum = hcd_to_dummy_hcd(platform_get_drvdata(pdev))->dum; if (dum->ss_hcd) { usb_remove_hcd(dummy_hcd_to_hcd(dum->ss_hcd)); usb_put_hcd(dummy_hcd_to_hcd(dum->ss_hcd)); } usb_remove_hcd(dummy_hcd_to_hcd(dum->hs_hcd)); usb_put_hcd(dummy_hcd_to_hcd(dum->hs_hcd)); dum->hs_hcd = NULL; dum->ss_hcd = NULL; } static int dummy_hcd_suspend(struct platform_device *pdev, pm_message_t state) { struct usb_hcd *hcd; struct dummy_hcd *dum_hcd; int rc = 0; dev_dbg(&pdev->dev, "%s\n", __func__); hcd = platform_get_drvdata(pdev); dum_hcd = hcd_to_dummy_hcd(hcd); if (dum_hcd->rh_state == DUMMY_RH_RUNNING) { dev_warn(&pdev->dev, "Root hub isn't suspended!\n"); rc = -EBUSY; } else clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); return rc; } static int dummy_hcd_resume(struct platform_device *pdev) { struct usb_hcd *hcd; dev_dbg(&pdev->dev, "%s\n", __func__); hcd = platform_get_drvdata(pdev); set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); usb_hcd_poll_rh_status(hcd); return 0; } static struct platform_driver dummy_hcd_driver = { .probe = dummy_hcd_probe, .remove_new = dummy_hcd_remove, .suspend = dummy_hcd_suspend, .resume = dummy_hcd_resume, .driver = { .name = driver_name, }, }; /*-------------------------------------------------------------------------*/ #define MAX_NUM_UDC 32 static struct platform_device *the_udc_pdev[MAX_NUM_UDC]; static struct platform_device *the_hcd_pdev[MAX_NUM_UDC]; static int __init dummy_hcd_init(void) { int retval = -ENOMEM; int i; struct dummy *dum[MAX_NUM_UDC] = {}; if (usb_disabled()) return -ENODEV; if (!mod_data.is_high_speed && mod_data.is_super_speed) return -EINVAL; if (mod_data.num < 1 || mod_data.num > MAX_NUM_UDC) { pr_err("Number of emulated UDC must be in range of 1...%d\n", MAX_NUM_UDC); return -EINVAL; } for (i = 0; i < mod_data.num; i++) { the_hcd_pdev[i] = platform_device_alloc(driver_name, i); if (!the_hcd_pdev[i]) { i--; while (i >= 0) platform_device_put(the_hcd_pdev[i--]); return retval; } } for (i = 0; i < mod_data.num; i++) { the_udc_pdev[i] = platform_device_alloc(gadget_name, i); if (!the_udc_pdev[i]) { i--; while (i >= 0) platform_device_put(the_udc_pdev[i--]); goto err_alloc_udc; } } for (i = 0; i < mod_data.num; i++) { dum[i] = kzalloc(sizeof(struct dummy), GFP_KERNEL); if (!dum[i]) { retval = -ENOMEM; goto err_add_pdata; } retval = platform_device_add_data(the_hcd_pdev[i], &dum[i], sizeof(void *)); if (retval) goto err_add_pdata; retval = platform_device_add_data(the_udc_pdev[i], &dum[i], sizeof(void *)); if (retval) goto err_add_pdata; } retval = platform_driver_register(&dummy_hcd_driver); if (retval < 0) goto err_add_pdata; retval = platform_driver_register(&dummy_udc_driver); if (retval < 0) goto err_register_udc_driver; for (i = 0; i < mod_data.num; i++) { retval = platform_device_add(the_hcd_pdev[i]); if (retval < 0) { i--; while (i >= 0) platform_device_del(the_hcd_pdev[i--]); goto err_add_hcd; } } for (i = 0; i < mod_data.num; i++) { if (!dum[i]->hs_hcd || (!dum[i]->ss_hcd && mod_data.is_super_speed)) { /* * The hcd was added successfully but its probe * function failed for some reason. */ retval = -EINVAL; goto err_add_udc; } } for (i = 0; i < mod_data.num; i++) { retval = platform_device_add(the_udc_pdev[i]); if (retval < 0) { i--; while (i >= 0) platform_device_del(the_udc_pdev[i--]); goto err_add_udc; } } for (i = 0; i < mod_data.num; i++) { if (!platform_get_drvdata(the_udc_pdev[i])) { /* * The udc was added successfully but its probe * function failed for some reason. */ retval = -EINVAL; goto err_probe_udc; } } return retval; err_probe_udc: for (i = 0; i < mod_data.num; i++) platform_device_del(the_udc_pdev[i]); err_add_udc: for (i = 0; i < mod_data.num; i++) platform_device_del(the_hcd_pdev[i]); err_add_hcd: platform_driver_unregister(&dummy_udc_driver); err_register_udc_driver: platform_driver_unregister(&dummy_hcd_driver); err_add_pdata: for (i = 0; i < mod_data.num; i++) kfree(dum[i]); for (i = 0; i < mod_data.num; i++) platform_device_put(the_udc_pdev[i]); err_alloc_udc: for (i = 0; i < mod_data.num; i++) platform_device_put(the_hcd_pdev[i]); return retval; } module_init(dummy_hcd_init); static void __exit dummy_hcd_cleanup(void) { int i; for (i = 0; i < mod_data.num; i++) { struct dummy *dum; dum = *((void **)dev_get_platdata(&the_udc_pdev[i]->dev)); platform_device_unregister(the_udc_pdev[i]); platform_device_unregister(the_hcd_pdev[i]); kfree(dum); } platform_driver_unregister(&dummy_udc_driver); platform_driver_unregister(&dummy_hcd_driver); } module_exit(dummy_hcd_cleanup); |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TIME_H #define _LINUX_TIME_H # include <linux/cache.h> # include <linux/math64.h> # include <linux/time64.h> extern struct timezone sys_tz; int get_timespec64(struct timespec64 *ts, const struct __kernel_timespec __user *uts); int put_timespec64(const struct timespec64 *ts, struct __kernel_timespec __user *uts); int get_itimerspec64(struct itimerspec64 *it, const struct __kernel_itimerspec __user *uit); int put_itimerspec64(const struct itimerspec64 *it, struct __kernel_itimerspec __user *uit); extern time64_t mktime64(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, const unsigned int min, const unsigned int sec); #ifdef CONFIG_POSIX_TIMERS extern void clear_itimer(void); #else static inline void clear_itimer(void) {} #endif extern long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, int flags); /* * Similar to the struct tm in userspace <time.h>, but it needs to be here so * that the kernel source is self contained. */ struct tm { /* * the number of seconds after the minute, normally in the range * 0 to 59, but can be up to 60 to allow for leap seconds */ int tm_sec; /* the number of minutes after the hour, in the range 0 to 59*/ int tm_min; /* the number of hours past midnight, in the range 0 to 23 */ int tm_hour; /* the day of the month, in the range 1 to 31 */ int tm_mday; /* the number of months since January, in the range 0 to 11 */ int tm_mon; /* the number of years since 1900 */ long tm_year; /* the number of days since Sunday, in the range 0 to 6 */ int tm_wday; /* the number of days since January 1, in the range 0 to 365 */ int tm_yday; }; void time64_to_tm(time64_t totalsecs, int offset, struct tm *result); # include <linux/time32.h> static inline bool itimerspec64_valid(const struct itimerspec64 *its) { if (!timespec64_valid(&(its->it_interval)) || !timespec64_valid(&(its->it_value))) return false; return true; } /** * time_after32 - compare two 32-bit relative times * @a: the time which may be after @b * @b: the time which may be before @a * * time_after32(a, b) returns true if the time @a is after time @b. * time_before32(b, a) returns true if the time @b is before time @a. * * Similar to time_after(), compare two 32-bit timestamps for relative * times. This is useful for comparing 32-bit seconds values that can't * be converted to 64-bit values (e.g. due to disk format or wire protocol * issues) when it is known that the times are less than 68 years apart. */ #define time_after32(a, b) ((s32)((u32)(b) - (u32)(a)) < 0) #define time_before32(b, a) time_after32(a, b) /** * time_between32 - check if a 32-bit timestamp is within a given time range * @t: the time which may be within [l,h] * @l: the lower bound of the range * @h: the higher bound of the range * * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are * treated as 32-bit integers. * * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)). */ #define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l)) # include <vdso/time.h> #endif |
49 49 59 13 49 212 212 200 9706 9714 907 7146 6408 9652 6434 724 720 8745 5995 349 340 2036 4215 4 4 4 4 1 1 9 9 4 4 1 9 9 3 3 3 3 1 1 1 3 1 1 1 15 15 166 169 3 8 8 9 1 1 8 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> #include <linux/highuid.h> #include <linux/cred.h> #include <linux/securebits.h> #include <linux/security.h> #include <linux/keyctl.h> #include <linux/key-type.h> #include <keys/user-type.h> #include <linux/seq_file.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/ctype.h> #include <linux/projid.h> #include <linux/fs_struct.h> #include <linux/bsearch.h> #include <linux/sort.h> static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); static void free_user_ns(struct work_struct *work); static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid) { return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES); } static void dec_user_namespaces(struct ucounts *ucounts) { return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES); } static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) { /* Start with the same capabilities as init but useless for doing * anything as the capabilities are bound to the new user namespace. */ cred->securebits = SECUREBITS_DEFAULT; cred->cap_inheritable = CAP_EMPTY_SET; cred->cap_permitted = CAP_FULL_SET; cred->cap_effective = CAP_FULL_SET; cred->cap_ambient = CAP_EMPTY_SET; cred->cap_bset = CAP_FULL_SET; #ifdef CONFIG_KEYS key_put(cred->request_key_auth); cred->request_key_auth = NULL; #endif /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ cred->user_ns = user_ns; } static unsigned long enforced_nproc_rlimit(void) { unsigned long limit = RLIM_INFINITY; /* Is RLIMIT_NPROC currently enforced? */ if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) || (current_user_ns() != &init_user_ns)) limit = rlimit(RLIMIT_NPROC); return limit; } /* * Create a new user namespace, deriving the creator from the user in the * passed credentials, and replacing that user with the new root user for the * new namespace. * * This is called by copy_creds(), which will finish setting the target task's * credentials. */ int create_user_ns(struct cred *new) { struct user_namespace *ns, *parent_ns = new->user_ns; kuid_t owner = new->euid; kgid_t group = new->egid; struct ucounts *ucounts; int ret, i; ret = -ENOSPC; if (parent_ns->level > 32) goto fail; ucounts = inc_user_namespaces(parent_ns, owner); if (!ucounts) goto fail; /* * Verify that we can not violate the policy of which files * may be accessed that is specified by the root directory, * by verifying that the root directory is at the root of the * mount namespace which allows all files to be accessed. */ ret = -EPERM; if (current_chrooted()) goto fail_dec; /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who * created a user_namespace. */ ret = -EPERM; if (!kuid_has_mapping(parent_ns, owner) || !kgid_has_mapping(parent_ns, group)) goto fail_dec; ret = security_create_user_ns(new); if (ret < 0) goto fail_dec; ret = -ENOMEM; ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); if (!ns) goto fail_dec; ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP); ret = ns_alloc_inum(&ns->ns); if (ret) goto fail_free; ns->ns.ops = &userns_operations; refcount_set(&ns->ns.count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; INIT_WORK(&ns->work, free_user_ns); for (i = 0; i < UCOUNT_COUNTS; i++) { ns->ucount_max[i] = INT_MAX; } set_userns_rlimit_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit()); set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE)); set_userns_rlimit_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING)); set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK)); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ mutex_lock(&userns_state_mutex); ns->flags = parent_ns->flags; mutex_unlock(&userns_state_mutex); #ifdef CONFIG_KEYS INIT_LIST_HEAD(&ns->keyring_name_list); init_rwsem(&ns->keyring_sem); #endif ret = -ENOMEM; if (!setup_userns_sysctls(ns)) goto fail_keyring; set_cred_user_ns(new, ns); return 0; fail_keyring: #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); #endif ns_free_inum(&ns->ns); fail_free: kmem_cache_free(user_ns_cachep, ns); fail_dec: dec_user_namespaces(ucounts); fail: return ret; } int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) { struct cred *cred; int err = -ENOMEM; if (!(unshare_flags & CLONE_NEWUSER)) return 0; cred = prepare_creds(); if (cred) { err = create_user_ns(cred); if (err) put_cred(cred); else *new_cred = cred; } return err; } static void free_user_ns(struct work_struct *work) { struct user_namespace *parent, *ns = container_of(work, struct user_namespace, work); do { struct ucounts *ucounts = ns->ucounts; parent = ns->parent; if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) { kfree(ns->gid_map.forward); kfree(ns->gid_map.reverse); } if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) { kfree(ns->uid_map.forward); kfree(ns->uid_map.reverse); } if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) { kfree(ns->projid_map.forward); kfree(ns->projid_map.reverse); } #if IS_ENABLED(CONFIG_BINFMT_MISC) kfree(ns->binfmt_misc); #endif retire_userns_sysctls(ns); key_free_user_ns(ns); ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); dec_user_namespaces(ucounts); ns = parent; } while (refcount_dec_and_test(&parent->ns.count)); } void __put_user_ns(struct user_namespace *ns) { schedule_work(&ns->work); } EXPORT_SYMBOL(__put_user_ns); /** * struct idmap_key - holds the information necessary to find an idmapping in a * sorted idmap array. It is passed to cmp_map_id() as first argument. */ struct idmap_key { bool map_up; /* true -> id from kid; false -> kid from id */ u32 id; /* id to find */ u32 count; /* == 0 unless used with map_id_range_down() */ }; /** * cmp_map_id - Function to be passed to bsearch() to find the requested * idmapping. Expects struct idmap_key to be passed via @k. */ static int cmp_map_id(const void *k, const void *e) { u32 first, last, id2; const struct idmap_key *key = k; const struct uid_gid_extent *el = e; id2 = key->id + key->count - 1; /* handle map_id_{down,up}() */ if (key->map_up) first = el->lower_first; else first = el->first; last = first + el->count - 1; if (key->id >= first && key->id <= last && (id2 >= first && id2 <= last)) return 0; if (key->id < first || id2 < first) return -1; return 1; } /** * map_id_range_down_max - Find idmap via binary search in ordered idmap array. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { struct idmap_key key; key.map_up = false; key.count = count; key.id = id; return bsearch(&key, map->forward, extents, sizeof(struct uid_gid_extent), cmp_map_id); } /** * map_id_range_down_base - Find idmap via binary search in static extent array. * Can only be called if number of mappings is equal or less than * UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { unsigned idx; u32 first, last, id2; id2 = id + count - 1; /* Find the matching extent */ for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; if (id >= first && id <= last && (id2 >= first && id2 <= last)) return &map->extent[idx]; } return NULL; } static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) { struct uid_gid_extent *extent; unsigned extents = map->nr_extents; smp_rmb(); if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS) extent = map_id_range_down_base(extents, map, id, count); else extent = map_id_range_down_max(extents, map, id, count); /* Map the id or note failure */ if (extent) id = (id - extent->first) + extent->lower_first; else id = (u32) -1; return id; } static u32 map_id_down(struct uid_gid_map *map, u32 id) { return map_id_range_down(map, id, 1); } /** * map_id_up_base - Find idmap via binary search in static extent array. * Can only be called if number of mappings is equal or less than * UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id) { unsigned idx; u32 first, last; /* Find the matching extent */ for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; if (id >= first && id <= last) return &map->extent[idx]; } return NULL; } /** * map_id_up_max - Find idmap via binary search in ordered idmap array. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id) { struct idmap_key key; key.map_up = true; key.count = 1; key.id = id; return bsearch(&key, map->reverse, extents, sizeof(struct uid_gid_extent), cmp_map_id); } static u32 map_id_up(struct uid_gid_map *map, u32 id) { struct uid_gid_extent *extent; unsigned extents = map->nr_extents; smp_rmb(); if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS) extent = map_id_up_base(extents, map, id); else extent = map_id_up_max(extents, map, id); /* Map the id or note failure */ if (extent) id = (id - extent->lower_first) + extent->first; else id = (u32) -1; return id; } /** * make_kuid - Map a user-namespace uid pair into a kuid. * @ns: User namespace that the uid is in * @uid: User identifier * * Maps a user-namespace uid pair into a kernel internal kuid, * and returns that kuid. * * When there is no mapping defined for the user-namespace uid * pair INVALID_UID is returned. Callers are expected to test * for and handle INVALID_UID being returned. INVALID_UID * may be tested for using uid_valid(). */ kuid_t make_kuid(struct user_namespace *ns, uid_t uid) { /* Map the uid to a global kernel uid */ return KUIDT_INIT(map_id_down(&ns->uid_map, uid)); } EXPORT_SYMBOL(make_kuid); /** * from_kuid - Create a uid from a kuid user-namespace pair. * @targ: The user namespace we want a uid in. * @kuid: The kernel internal uid to start with. * * Map @kuid into the user-namespace specified by @targ and * return the resulting uid. * * There is always a mapping into the initial user_namespace. * * If @kuid has no mapping in @targ (uid_t)-1 is returned. */ uid_t from_kuid(struct user_namespace *targ, kuid_t kuid) { /* Map the uid from a global kernel uid */ return map_id_up(&targ->uid_map, __kuid_val(kuid)); } EXPORT_SYMBOL(from_kuid); /** * from_kuid_munged - Create a uid from a kuid user-namespace pair. * @targ: The user namespace we want a uid in. * @kuid: The kernel internal uid to start with. * * Map @kuid into the user-namespace specified by @targ and * return the resulting uid. * * There is always a mapping into the initial user_namespace. * * Unlike from_kuid from_kuid_munged never fails and always * returns a valid uid. This makes from_kuid_munged appropriate * for use in syscalls like stat and getuid where failing the * system call and failing to provide a valid uid are not an * options. * * If @kuid has no mapping in @targ overflowuid is returned. */ uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid) { uid_t uid; uid = from_kuid(targ, kuid); if (uid == (uid_t) -1) uid = overflowuid; return uid; } EXPORT_SYMBOL(from_kuid_munged); /** * make_kgid - Map a user-namespace gid pair into a kgid. * @ns: User namespace that the gid is in * @gid: group identifier * * Maps a user-namespace gid pair into a kernel internal kgid, * and returns that kgid. * * When there is no mapping defined for the user-namespace gid * pair INVALID_GID is returned. Callers are expected to test * for and handle INVALID_GID being returned. INVALID_GID may be * tested for using gid_valid(). */ kgid_t make_kgid(struct user_namespace *ns, gid_t gid) { /* Map the gid to a global kernel gid */ return KGIDT_INIT(map_id_down(&ns->gid_map, gid)); } EXPORT_SYMBOL(make_kgid); /** * from_kgid - Create a gid from a kgid user-namespace pair. * @targ: The user namespace we want a gid in. * @kgid: The kernel internal gid to start with. * * Map @kgid into the user-namespace specified by @targ and * return the resulting gid. * * There is always a mapping into the initial user_namespace. * * If @kgid has no mapping in @targ (gid_t)-1 is returned. */ gid_t from_kgid(struct user_namespace *targ, kgid_t kgid) { /* Map the gid from a global kernel gid */ return map_id_up(&targ->gid_map, __kgid_val(kgid)); } EXPORT_SYMBOL(from_kgid); /** * from_kgid_munged - Create a gid from a kgid user-namespace pair. * @targ: The user namespace we want a gid in. * @kgid: The kernel internal gid to start with. * * Map @kgid into the user-namespace specified by @targ and * return the resulting gid. * * There is always a mapping into the initial user_namespace. * * Unlike from_kgid from_kgid_munged never fails and always * returns a valid gid. This makes from_kgid_munged appropriate * for use in syscalls like stat and getgid where failing the * system call and failing to provide a valid gid are not options. * * If @kgid has no mapping in @targ overflowgid is returned. */ gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) { gid_t gid; gid = from_kgid(targ, kgid); if (gid == (gid_t) -1) gid = overflowgid; return gid; } EXPORT_SYMBOL(from_kgid_munged); /** * make_kprojid - Map a user-namespace projid pair into a kprojid. * @ns: User namespace that the projid is in * @projid: Project identifier * * Maps a user-namespace uid pair into a kernel internal kuid, * and returns that kuid. * * When there is no mapping defined for the user-namespace projid * pair INVALID_PROJID is returned. Callers are expected to test * for and handle INVALID_PROJID being returned. INVALID_PROJID * may be tested for using projid_valid(). */ kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) { /* Map the uid to a global kernel uid */ return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); } EXPORT_SYMBOL(make_kprojid); /** * from_kprojid - Create a projid from a kprojid user-namespace pair. * @targ: The user namespace we want a projid in. * @kprojid: The kernel internal project identifier to start with. * * Map @kprojid into the user-namespace specified by @targ and * return the resulting projid. * * There is always a mapping into the initial user_namespace. * * If @kprojid has no mapping in @targ (projid_t)-1 is returned. */ projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) { /* Map the uid from a global kernel uid */ return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); } EXPORT_SYMBOL(from_kprojid); /** * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. * @targ: The user namespace we want a projid in. * @kprojid: The kernel internal projid to start with. * * Map @kprojid into the user-namespace specified by @targ and * return the resulting projid. * * There is always a mapping into the initial user_namespace. * * Unlike from_kprojid from_kprojid_munged never fails and always * returns a valid projid. This makes from_kprojid_munged * appropriate for use in syscalls like stat and where * failing the system call and failing to provide a valid projid are * not an options. * * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. */ projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) { projid_t projid; projid = from_kprojid(targ, kprojid); if (projid == (projid_t) -1) projid = OVERFLOW_PROJID; return projid; } EXPORT_SYMBOL(from_kprojid_munged); static int uid_m_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; struct uid_gid_extent *extent = v; struct user_namespace *lower_ns; uid_t lower; lower_ns = seq_user_ns(seq); if ((lower_ns == ns) && lower_ns->parent) lower_ns = lower_ns->parent; lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first)); seq_printf(seq, "%10u %10u %10u\n", extent->first, lower, extent->count); return 0; } static int gid_m_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; struct uid_gid_extent *extent = v; struct user_namespace *lower_ns; gid_t lower; lower_ns = seq_user_ns(seq); if ((lower_ns == ns) && lower_ns->parent) lower_ns = lower_ns->parent; lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first)); seq_printf(seq, "%10u %10u %10u\n", extent->first, lower, extent->count); return 0; } static int projid_m_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; struct uid_gid_extent *extent = v; struct user_namespace *lower_ns; projid_t lower; lower_ns = seq_user_ns(seq); if ((lower_ns == ns) && lower_ns->parent) lower_ns = lower_ns->parent; lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); seq_printf(seq, "%10u %10u %10u\n", extent->first, lower, extent->count); return 0; } static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) { loff_t pos = *ppos; unsigned extents = map->nr_extents; smp_rmb(); if (pos >= extents) return NULL; if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS) return &map->extent[pos]; return &map->forward[pos]; } static void *uid_m_start(struct seq_file *seq, loff_t *ppos) { struct user_namespace *ns = seq->private; return m_start(seq, ppos, &ns->uid_map); } static void *gid_m_start(struct seq_file *seq, loff_t *ppos) { struct user_namespace *ns = seq->private; return m_start(seq, ppos, &ns->gid_map); } static void *projid_m_start(struct seq_file *seq, loff_t *ppos) { struct user_namespace *ns = seq->private; return m_start(seq, ppos, &ns->projid_map); } static void *m_next(struct seq_file *seq, void *v, loff_t *pos) { (*pos)++; return seq->op->start(seq, pos); } static void m_stop(struct seq_file *seq, void *v) { return; } const struct seq_operations proc_uid_seq_operations = { .start = uid_m_start, .stop = m_stop, .next = m_next, .show = uid_m_show, }; const struct seq_operations proc_gid_seq_operations = { .start = gid_m_start, .stop = m_stop, .next = m_next, .show = gid_m_show, }; const struct seq_operations proc_projid_seq_operations = { .start = projid_m_start, .stop = m_stop, .next = m_next, .show = projid_m_show, }; static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent *extent) { u32 upper_first, lower_first, upper_last, lower_last; unsigned idx; upper_first = extent->first; lower_first = extent->lower_first; upper_last = upper_first + extent->count - 1; lower_last = lower_first + extent->count - 1; for (idx = 0; idx < new_map->nr_extents; idx++) { u32 prev_upper_first, prev_lower_first; u32 prev_upper_last, prev_lower_last; struct uid_gid_extent *prev; if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) prev = &new_map->extent[idx]; else prev = &new_map->forward[idx]; prev_upper_first = prev->first; prev_lower_first = prev->lower_first; prev_upper_last = prev_upper_first + prev->count - 1; prev_lower_last = prev_lower_first + prev->count - 1; /* Does the upper range intersect a previous extent? */ if ((prev_upper_first <= upper_last) && (prev_upper_last >= upper_first)) return true; /* Does the lower range intersect a previous extent? */ if ((prev_lower_first <= lower_last) && (prev_lower_last >= lower_first)) return true; } return false; } /** * insert_extent - Safely insert a new idmap extent into struct uid_gid_map. * Takes care to allocate a 4K block of memory if the number of mappings exceeds * UID_GID_MAP_MAX_BASE_EXTENTS. */ static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent) { struct uid_gid_extent *dest; if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) { struct uid_gid_extent *forward; /* Allocate memory for 340 mappings. */ forward = kmalloc_array(UID_GID_MAP_MAX_EXTENTS, sizeof(struct uid_gid_extent), GFP_KERNEL); if (!forward) return -ENOMEM; /* Copy over memory. Only set up memory for the forward pointer. * Defer the memory setup for the reverse pointer. */ memcpy(forward, map->extent, map->nr_extents * sizeof(map->extent[0])); map->forward = forward; map->reverse = NULL; } if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS) dest = &map->extent[map->nr_extents]; else dest = &map->forward[map->nr_extents]; *dest = *extent; map->nr_extents++; return 0; } /* cmp function to sort() forward mappings */ static int cmp_extents_forward(const void *a, const void *b) { const struct uid_gid_extent *e1 = a; const struct uid_gid_extent *e2 = b; if (e1->first < e2->first) return -1; if (e1->first > e2->first) return 1; return 0; } /* cmp function to sort() reverse mappings */ static int cmp_extents_reverse(const void *a, const void *b) { const struct uid_gid_extent *e1 = a; const struct uid_gid_extent *e2 = b; if (e1->lower_first < e2->lower_first) return -1; if (e1->lower_first > e2->lower_first) return 1; return 0; } /** * sort_idmaps - Sorts an array of idmap entries. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS. */ static int sort_idmaps(struct uid_gid_map *map) { if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) return 0; /* Sort forward array. */ sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent), cmp_extents_forward, NULL); /* Only copy the memory from forward we actually need. */ map->reverse = kmemdup(map->forward, map->nr_extents * sizeof(struct uid_gid_extent), GFP_KERNEL); if (!map->reverse) return -ENOMEM; /* Sort reverse array. */ sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent), cmp_extents_reverse, NULL); return 0; } /** * verify_root_map() - check the uid 0 mapping * @file: idmapping file * @map_ns: user namespace of the target process * @new_map: requested idmap * * If a process requests mapping parent uid 0 into the new ns, verify that the * process writing the map had the CAP_SETFCAP capability as the target process * will be able to write fscaps that are valid in ancestor user namespaces. * * Return: true if the mapping is allowed, false if not. */ static bool verify_root_map(const struct file *file, struct user_namespace *map_ns, struct uid_gid_map *new_map) { int idx; const struct user_namespace *file_ns = file->f_cred->user_ns; struct uid_gid_extent *extent0 = NULL; for (idx = 0; idx < new_map->nr_extents; idx++) { if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) extent0 = &new_map->extent[idx]; else extent0 = &new_map->forward[idx]; if (extent0->lower_first == 0) break; extent0 = NULL; } if (!extent0) return true; if (map_ns == file_ns) { /* The process unshared its ns and is writing to its own * /proc/self/uid_map. User already has full capabilites in * the new namespace. Verify that the parent had CAP_SETFCAP * when it unshared. * */ if (!file_ns->parent_could_setfcap) return false; } else { /* Process p1 is writing to uid_map of p2, who is in a child * user namespace to p1's. Verify that the opener of the map * file has CAP_SETFCAP against the parent of the new map * namespace */ if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP)) return false; } return true; } static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, struct uid_gid_map *map, struct uid_gid_map *parent_map) { struct seq_file *seq = file->private_data; struct user_namespace *map_ns = seq->private; struct uid_gid_map new_map; unsigned idx; struct uid_gid_extent extent; char *kbuf = NULL, *pos, *next_line; ssize_t ret; /* Only allow < page size writes at the beginning of the file */ if ((*ppos != 0) || (count >= PAGE_SIZE)) return -EINVAL; /* Slurp in the user data */ kbuf = memdup_user_nul(buf, count); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); /* * The userns_state_mutex serializes all writes to any given map. * * Any map is only ever written once. * * An id map fits within 1 cache line on most architectures. * * On read nothing needs to be done unless you are on an * architecture with a crazy cache coherency model like alpha. * * There is a one time data dependency between reading the * count of the extents and the values of the extents. The * desired behavior is to see the values of the extents that * were written before the count of the extents. * * To achieve this smp_wmb() is used on guarantee the write * order and smp_rmb() is guaranteed that we don't have crazy * architectures returning stale data. */ mutex_lock(&userns_state_mutex); memset(&new_map, 0, sizeof(struct uid_gid_map)); ret = -EPERM; /* Only allow one successful write to the map */ if (map->nr_extents != 0) goto out; /* * Adjusting namespace settings requires capabilities on the target. */ if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN)) goto out; /* Parse the user data */ ret = -EINVAL; pos = kbuf; for (; pos; pos = next_line) { /* Find the end of line and ensure I don't look past it */ next_line = strchr(pos, '\n'); if (next_line) { *next_line = '\0'; next_line++; if (*next_line == '\0') next_line = NULL; } pos = skip_spaces(pos); extent.first = simple_strtoul(pos, &pos, 10); if (!isspace(*pos)) goto out; pos = skip_spaces(pos); extent.lower_first = simple_strtoul(pos, &pos, 10); if (!isspace(*pos)) goto out; pos = skip_spaces(pos); extent.count = simple_strtoul(pos, &pos, 10); if (*pos && !isspace(*pos)) goto out; /* Verify there is not trailing junk on the line */ pos = skip_spaces(pos); if (*pos != '\0') goto out; /* Verify we have been given valid starting values */ if ((extent.first == (u32) -1) || (extent.lower_first == (u32) -1)) goto out; /* Verify count is not zero and does not cause the * extent to wrap */ if ((extent.first + extent.count) <= extent.first) goto out; if ((extent.lower_first + extent.count) <= extent.lower_first) goto out; /* Do the ranges in extent overlap any previous extents? */ if (mappings_overlap(&new_map, &extent)) goto out; if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS && (next_line != NULL)) goto out; ret = insert_extent(&new_map, &extent); if (ret < 0) goto out; ret = -EINVAL; } /* Be very certain the new map actually exists */ if (new_map.nr_extents == 0) goto out; ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map)) goto out; ret = -EPERM; /* Map the lower ids from the parent user namespace to the * kernel global id space. */ for (idx = 0; idx < new_map.nr_extents; idx++) { struct uid_gid_extent *e; u32 lower_first; if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) e = &new_map.extent[idx]; else e = &new_map.forward[idx]; lower_first = map_id_range_down(parent_map, e->lower_first, e->count); /* Fail if we can not map the specified extent to * the kernel global id space. */ if (lower_first == (u32) -1) goto out; e->lower_first = lower_first; } /* * If we want to use binary search for lookup, this clones the extent * array and sorts both copies. */ ret = sort_idmaps(&new_map); if (ret < 0) goto out; /* Install the map */ if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) { memcpy(map->extent, new_map.extent, new_map.nr_extents * sizeof(new_map.extent[0])); } else { map->forward = new_map.forward; map->reverse = new_map.reverse; } smp_wmb(); map->nr_extents = new_map.nr_extents; *ppos = count; ret = count; out: if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) { kfree(new_map.forward); kfree(new_map.reverse); map->forward = NULL; map->reverse = NULL; map->nr_extents = 0; } mutex_unlock(&userns_state_mutex); kfree(kbuf); return ret; } ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; struct user_namespace *seq_ns = seq_user_ns(seq); if (!ns->parent) return -EPERM; if ((seq_ns != ns) && (seq_ns != ns->parent)) return -EPERM; return map_write(file, buf, size, ppos, CAP_SETUID, &ns->uid_map, &ns->parent->uid_map); } ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; struct user_namespace *seq_ns = seq_user_ns(seq); if (!ns->parent) return -EPERM; if ((seq_ns != ns) && (seq_ns != ns->parent)) return -EPERM; return map_write(file, buf, size, ppos, CAP_SETGID, &ns->gid_map, &ns->parent->gid_map); } ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; struct user_namespace *seq_ns = seq_user_ns(seq); if (!ns->parent) return -EPERM; if ((seq_ns != ns) && (seq_ns != ns->parent)) return -EPERM; /* Anyone can set any valid project id no capability needed */ return map_write(file, buf, size, ppos, -1, &ns->projid_map, &ns->parent->projid_map); } static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { const struct cred *cred = file->f_cred; if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map)) return false; /* Don't allow mappings that would allow anything that wouldn't * be allowed without the establishment of unprivileged mappings. */ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && uid_eq(ns->owner, cred->euid)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); if (uid_eq(uid, cred->euid)) return true; } else if (cap_setid == CAP_SETGID) { kgid_t gid = make_kgid(ns->parent, id); if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && gid_eq(gid, cred->egid)) return true; } } /* Allow anyone to set a mapping that doesn't require privilege */ if (!cap_valid(cap_setid)) return true; /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. * And the opener of the id file also has the appropriate capability. */ if (ns_capable(ns->parent, cap_setid) && file_ns_capable(file, ns->parent, cap_setid)) return true; return false; } int proc_setgroups_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; unsigned long userns_flags = READ_ONCE(ns->flags); seq_printf(seq, "%s\n", (userns_flags & USERNS_SETGROUPS_ALLOWED) ? "allow" : "deny"); return 0; } ssize_t proc_setgroups_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; char kbuf[8], *pos; bool setgroups_allowed; ssize_t ret; /* Only allow a very narrow range of strings to be written */ ret = -EINVAL; if ((*ppos != 0) || (count >= sizeof(kbuf))) goto out; /* What was written? */ ret = -EFAULT; if (copy_from_user(kbuf, buf, count)) goto out; kbuf[count] = '\0'; pos = kbuf; /* What is being requested? */ ret = -EINVAL; if (strncmp(pos, "allow", 5) == 0) { pos += 5; setgroups_allowed = true; } else if (strncmp(pos, "deny", 4) == 0) { pos += 4; setgroups_allowed = false; } else goto out; /* Verify there is not trailing junk on the line */ pos = skip_spaces(pos); if (*pos != '\0') goto out; ret = -EPERM; mutex_lock(&userns_state_mutex); if (setgroups_allowed) { /* Enabling setgroups after setgroups has been disabled * is not allowed. */ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) goto out_unlock; } else { /* Permanently disabling setgroups after setgroups has * been enabled by writing the gid_map is not allowed. */ if (ns->gid_map.nr_extents != 0) goto out_unlock; ns->flags &= ~USERNS_SETGROUPS_ALLOWED; } mutex_unlock(&userns_state_mutex); /* Report a successful write */ *ppos = count; ret = count; out: return ret; out_unlock: mutex_unlock(&userns_state_mutex); goto out; } bool userns_may_setgroups(const struct user_namespace *ns) { bool allowed; mutex_lock(&userns_state_mutex); /* It is not safe to use setgroups until a gid mapping in * the user namespace has been established. */ allowed = ns->gid_map.nr_extents != 0; /* Is setgroups allowed? */ allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); mutex_unlock(&userns_state_mutex); return allowed; } /* * Returns true if @child is the same namespace or a descendant of * @ancestor. */ bool in_userns(const struct user_namespace *ancestor, const struct user_namespace *child) { const struct user_namespace *ns; for (ns = child; ns->level > ancestor->level; ns = ns->parent) ; return (ns == ancestor); } bool current_in_userns(const struct user_namespace *target_ns) { return in_userns(target_ns, current_user_ns()); } EXPORT_SYMBOL(current_in_userns); static inline struct user_namespace *to_user_ns(struct ns_common *ns) { return container_of(ns, struct user_namespace, ns); } static struct ns_common *userns_get(struct task_struct *task) { struct user_namespace *user_ns; rcu_read_lock(); user_ns = get_user_ns(__task_cred(task)->user_ns); rcu_read_unlock(); return user_ns ? &user_ns->ns : NULL; } static void userns_put(struct ns_common *ns) { put_user_ns(to_user_ns(ns)); } static int userns_install(struct nsset *nsset, struct ns_common *ns) { struct user_namespace *user_ns = to_user_ns(ns); struct cred *cred; /* Don't allow gaining capabilities by reentering * the same user namespace. */ if (user_ns == current_user_ns()) return -EINVAL; /* Tasks that share a thread group must share a user namespace */ if (!thread_group_empty(current)) return -EINVAL; if (current->fs->users != 1) return -EINVAL; if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; cred = nsset_cred(nsset); if (!cred) return -EINVAL; put_user_ns(cred->user_ns); set_cred_user_ns(cred, get_user_ns(user_ns)); if (set_cred_ucounts(cred) < 0) return -EINVAL; return 0; } struct ns_common *ns_get_owner(struct ns_common *ns) { struct user_namespace *my_user_ns = current_user_ns(); struct user_namespace *owner, *p; /* See if the owner is in the current user namespace */ owner = p = ns->ops->owner(ns); for (;;) { if (!p) return ERR_PTR(-EPERM); if (p == my_user_ns) break; p = p->parent; } return &get_user_ns(owner)->ns; } static struct user_namespace *userns_owner(struct ns_common *ns) { return to_user_ns(ns)->parent; } const struct proc_ns_operations userns_operations = { .name = "user", .type = CLONE_NEWUSER, .get = userns_get, .put = userns_put, .install = userns_install, .owner = userns_owner, .get_parent = ns_get_owner, }; static __init int user_namespaces_init(void) { user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT); return 0; } subsys_initcall(user_namespaces_init); |
1 1 4 1 1 51 51 48 49 51 49 51 51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 | // SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include <trace/events/mmap_lock.h> #include <linux/mm.h> #include <linux/cgroup.h> #include <linux/memcontrol.h> #include <linux/mmap_lock.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/smp.h> #include <linux/trace_events.h> #include <linux/local_lock.h> EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); #ifdef CONFIG_MEMCG /* * Our various events all share the same buffer (because we don't want or need * to allocate a set of buffers *per event type*), so we need to protect against * concurrent _reg() and _unreg() calls, and count how many _reg() calls have * been made. */ static DEFINE_MUTEX(reg_lock); static int reg_refcount; /* Protected by reg_lock. */ /* * Size of the buffer for memcg path names. Ignoring stack trace support, * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it. */ #define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL /* * How many contexts our trace events might be called in: normal, softirq, irq, * and NMI. */ #define CONTEXT_COUNT 4 struct memcg_path { local_lock_t lock; char __rcu *buf; local_t buf_idx; }; static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = { .lock = INIT_LOCAL_LOCK(lock), .buf_idx = LOCAL_INIT(0), }; static char **tmp_bufs; /* Called with reg_lock held. */ static void free_memcg_path_bufs(void) { struct memcg_path *memcg_path; int cpu; char **old = tmp_bufs; for_each_possible_cpu(cpu) { memcg_path = per_cpu_ptr(&memcg_paths, cpu); *(old++) = rcu_dereference_protected(memcg_path->buf, lockdep_is_held(®_lock)); rcu_assign_pointer(memcg_path->buf, NULL); } /* Wait for inflight memcg_path_buf users to finish. */ synchronize_rcu(); old = tmp_bufs; for_each_possible_cpu(cpu) { kfree(*(old++)); } kfree(tmp_bufs); tmp_bufs = NULL; } int trace_mmap_lock_reg(void) { int cpu; char *new; mutex_lock(®_lock); /* If the refcount is going 0->1, proceed with allocating buffers. */ if (reg_refcount++) goto out; tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs), GFP_KERNEL); if (tmp_bufs == NULL) goto out_fail; for_each_possible_cpu(cpu) { new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL); if (new == NULL) goto out_fail_free; rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new); /* Don't need to wait for inflights, they'd have gotten NULL. */ } out: mutex_unlock(®_lock); return 0; out_fail_free: free_memcg_path_bufs(); out_fail: /* Since we failed, undo the earlier ref increment. */ --reg_refcount; mutex_unlock(®_lock); return -ENOMEM; } void trace_mmap_lock_unreg(void) { mutex_lock(®_lock); /* If the refcount is going 1->0, proceed with freeing buffers. */ if (--reg_refcount) goto out; free_memcg_path_bufs(); out: mutex_unlock(®_lock); } static inline char *get_memcg_path_buf(void) { struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths); char *buf; int idx; rcu_read_lock(); buf = rcu_dereference(memcg_path->buf); if (buf == NULL) { rcu_read_unlock(); return NULL; } idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) - MEMCG_PATH_BUF_SIZE; return &buf[idx]; } static inline void put_memcg_path_buf(void) { local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx); rcu_read_unlock(); } #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ do { \ const char *memcg_path; \ local_lock(&memcg_paths.lock); \ memcg_path = get_mm_memcg_path(mm); \ trace_mmap_lock_##type(mm, \ memcg_path != NULL ? memcg_path : "", \ ##__VA_ARGS__); \ if (likely(memcg_path != NULL)) \ put_memcg_path_buf(); \ local_unlock(&memcg_paths.lock); \ } while (0) #else /* !CONFIG_MEMCG */ int trace_mmap_lock_reg(void) { return 0; } void trace_mmap_lock_unreg(void) { } #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ trace_mmap_lock_##type(mm, "", ##__VA_ARGS__) #endif /* CONFIG_MEMCG */ #ifdef CONFIG_TRACING #ifdef CONFIG_MEMCG /* * Write the given mm_struct's memcg path to a percpu buffer, and return a * pointer to it. If the path cannot be determined, or no buffer was available * (because the trace event is being unregistered), NULL is returned. * * Note: buffers are allocated per-cpu to avoid locking, so preemption must be * disabled by the caller before calling us, and re-enabled only after the * caller is done with the pointer. * * The caller must call put_memcg_path_buf() once the buffer is no longer * needed. This must be done while preemption is still disabled. */ static const char *get_mm_memcg_path(struct mm_struct *mm) { char *buf = NULL; struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); if (memcg == NULL) goto out; if (unlikely(memcg->css.cgroup == NULL)) goto out_put; buf = get_memcg_path_buf(); if (buf == NULL) goto out_put; cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE); out_put: css_put(&memcg->css); out: return buf; } #endif /* CONFIG_MEMCG */ /* * Trace calls must be in a separate file, as otherwise there's a circular * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. */ void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) { TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, bool success) { TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); } EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) { TRACE_MMAP_LOCK_EVENT(released, mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_released); #endif /* CONFIG_TRACING */ |
2 22 17 5 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | // SPDX-License-Identifier: GPL-2.0-or-later /* Module signature checker * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/module_signature.h> #include <linux/string.h> #include <linux/verification.h> #include <linux/security.h> #include <crypto/public_key.h> #include <uapi/linux/module.h> #include "internal.h" #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "module." static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); /* * Export sig_enforce kernel cmdline parameter to allow other subsystems rely * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. */ bool is_module_sig_enforced(void) { return sig_enforce; } EXPORT_SYMBOL(is_module_sig_enforced); void set_module_sig_enforced(void) { sig_enforce = true; } /* * Verify the signature on a module. */ int mod_verify_sig(const void *mod, struct load_info *info) { struct module_signature ms; size_t sig_len, modlen = info->len; int ret; pr_devel("==>%s(,%zu)\n", __func__, modlen); if (modlen <= sizeof(ms)) return -EBADMSG; memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); ret = mod_check_sig(&ms, modlen, "module"); if (ret) return ret; sig_len = be32_to_cpu(ms.sig_len); modlen -= sig_len + sizeof(ms); info->len = modlen; return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, VERIFY_USE_SECONDARY_KEYRING, VERIFYING_MODULE_SIGNATURE, NULL, NULL); } int module_sig_check(struct load_info *info, int flags) { int err = -ENODATA; const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; const char *reason; const void *mod = info->hdr; bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS | MODULE_INIT_IGNORE_VERMAGIC); /* * Do not allow mangled modules as a module with version information * removed is no longer the module that was signed. */ if (!mangled_module && info->len > markerlen && memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ info->len -= markerlen; err = mod_verify_sig(mod, info); if (!err) { info->sig_ok = true; return 0; } } /* * We don't permit modules to be loaded into the trusted kernels * without a valid signature on them, but if we're not enforcing, * certain errors are non-fatal. */ switch (err) { case -ENODATA: reason = "unsigned module"; break; case -ENOPKG: reason = "module with unsupported crypto"; break; case -ENOKEY: reason = "module with unavailable key"; break; default: /* * All other errors are fatal, including lack of memory, * unparseable signatures, and signature check failures -- * even if signatures aren't required. */ return err; } if (is_module_sig_enforced()) { pr_notice("Loading of %s is rejected\n", reason); return -EKEYREJECTED; } return security_locked_down(LOCKDOWN_MODULE_SIGNATURE); } |
110 109 3 48 17 109 110 17 110 108 49 108 110 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_log_format.h" #include "xfs_bit.h" /* * XFS bit manipulation routines, used in non-realtime code. */ /* * Return whether bitmap is empty. * Size is number of words in the bitmap, which is padded to word boundary * Returns 1 for empty, 0 for non-empty. */ int xfs_bitmap_empty(uint *map, uint size) { uint i; for (i = 0; i < size; i++) { if (map[i] != 0) return 0; } return 1; } /* * Count the number of contiguous bits set in the bitmap starting with bit * start_bit. Size is the size of the bitmap in words. */ int xfs_contig_bits(uint *map, uint size, uint start_bit) { uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); uint result = 0; uint tmp; size <<= BIT_TO_WORD_SHIFT; ASSERT(start_bit < size); size -= start_bit & ~(NBWORD - 1); start_bit &= (NBWORD - 1); if (start_bit) { tmp = *p++; /* set to one first offset bits prior to start */ tmp |= (~0U >> (NBWORD-start_bit)); if (tmp != ~0U) goto found; result += NBWORD; size -= NBWORD; } while (size) { if ((tmp = *p++) != ~0U) goto found; result += NBWORD; size -= NBWORD; } return result - start_bit; found: return result + ffz(tmp) - start_bit; } /* * This takes the bit number to start looking from and * returns the next set bit from there. It returns -1 * if there are no more bits set or the start bit is * beyond the end of the bitmap. * * Size is the number of words, not bytes, in the bitmap. */ int xfs_next_bit(uint *map, uint size, uint start_bit) { uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); uint result = start_bit & ~(NBWORD - 1); uint tmp; size <<= BIT_TO_WORD_SHIFT; if (start_bit >= size) return -1; size -= result; start_bit &= (NBWORD - 1); if (start_bit) { tmp = *p++; /* set to zero first offset bits prior to start */ tmp &= (~0U << start_bit); if (tmp != 0U) goto found; result += NBWORD; size -= NBWORD; } while (size) { if ((tmp = *p++) != 0U) goto found; result += NBWORD; size -= NBWORD; } return -1; found: return result + ffs(tmp) - 1; } |
2503 2505 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013 Politecnico di Torino, Italy * TORSEC group -- https://security.polito.it * * Author: Roberto Sassu <roberto.sassu@polito.it> * * File: ima_template.c * Helpers to manage template descriptors. */ #include <linux/rculist.h> #include "ima.h" #include "ima_template_lib.h" enum header_fields { HDR_PCR, HDR_DIGEST, HDR_TEMPLATE_NAME, HDR_TEMPLATE_DATA, HDR__LAST }; static struct ima_template_desc builtin_templates[] = { {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT}, {.name = "ima-ng", .fmt = "d-ng|n-ng"}, {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"}, {.name = "ima-ngv2", .fmt = "d-ngv2|n-ng"}, {.name = "ima-sigv2", .fmt = "d-ngv2|n-ng|sig"}, {.name = "ima-buf", .fmt = "d-ng|n-ng|buf"}, {.name = "ima-modsig", .fmt = "d-ng|n-ng|sig|d-modsig|modsig"}, {.name = "evm-sig", .fmt = "d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode"}, {.name = "", .fmt = ""}, /* placeholder for a custom format */ }; static LIST_HEAD(defined_templates); static DEFINE_SPINLOCK(template_list); static int template_setup_done; static const struct ima_template_field supported_fields[] = { {.field_id = "d", .field_init = ima_eventdigest_init, .field_show = ima_show_template_digest}, {.field_id = "n", .field_init = ima_eventname_init, .field_show = ima_show_template_string}, {.field_id = "d-ng", .field_init = ima_eventdigest_ng_init, .field_show = ima_show_template_digest_ng}, {.field_id = "d-ngv2", .field_init = ima_eventdigest_ngv2_init, .field_show = ima_show_template_digest_ngv2}, {.field_id = "n-ng", .field_init = ima_eventname_ng_init, .field_show = ima_show_template_string}, {.field_id = "sig", .field_init = ima_eventsig_init, .field_show = ima_show_template_sig}, {.field_id = "buf", .field_init = ima_eventbuf_init, .field_show = ima_show_template_buf}, {.field_id = "d-modsig", .field_init = ima_eventdigest_modsig_init, .field_show = ima_show_template_digest_ng}, {.field_id = "modsig", .field_init = ima_eventmodsig_init, .field_show = ima_show_template_sig}, {.field_id = "evmsig", .field_init = ima_eventevmsig_init, .field_show = ima_show_template_sig}, {.field_id = "iuid", .field_init = ima_eventinodeuid_init, .field_show = ima_show_template_uint}, {.field_id = "igid", .field_init = ima_eventinodegid_init, .field_show = ima_show_template_uint}, {.field_id = "imode", .field_init = ima_eventinodemode_init, .field_show = ima_show_template_uint}, {.field_id = "xattrnames", .field_init = ima_eventinodexattrnames_init, .field_show = ima_show_template_string}, {.field_id = "xattrlengths", .field_init = ima_eventinodexattrlengths_init, .field_show = ima_show_template_sig}, {.field_id = "xattrvalues", .field_init = ima_eventinodexattrvalues_init, .field_show = ima_show_template_sig}, }; /* * Used when restoring measurements carried over from a kexec. 'd' and 'n' don't * need to be accounted for since they shouldn't be defined in the same template * description as 'd-ng' and 'n-ng' respectively. */ #define MAX_TEMPLATE_NAME_LEN \ sizeof("d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode") static struct ima_template_desc *ima_template; static struct ima_template_desc *ima_buf_template; /** * ima_template_has_modsig - Check whether template has modsig-related fields. * @ima_template: IMA template to check. * * Tells whether the given template has fields referencing a file's appended * signature. */ bool ima_template_has_modsig(const struct ima_template_desc *ima_template) { int i; for (i = 0; i < ima_template->num_fields; i++) if (!strcmp(ima_template->fields[i]->field_id, "modsig") || !strcmp(ima_template->fields[i]->field_id, "d-modsig")) return true; return false; } static int __init ima_template_setup(char *str) { struct ima_template_desc *template_desc; int template_len = strlen(str); if (template_setup_done) return 1; if (!ima_template) ima_init_template_list(); /* * Verify that a template with the supplied name exists. * If not, use CONFIG_IMA_DEFAULT_TEMPLATE. */ template_desc = lookup_template_desc(str); if (!template_desc) { pr_err("template %s not found, using %s\n", str, CONFIG_IMA_DEFAULT_TEMPLATE); return 1; } /* * Verify whether the current hash algorithm is supported * by the 'ima' template. */ if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 && ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) { pr_err("template does not support hash alg\n"); return 1; } ima_template = template_desc; template_setup_done = 1; return 1; } __setup("ima_template=", ima_template_setup); static int __init ima_template_fmt_setup(char *str) { int num_templates = ARRAY_SIZE(builtin_templates); if (template_setup_done) return 1; if (template_desc_init_fields(str, NULL, NULL) < 0) { pr_err("format string '%s' not valid, using template %s\n", str, CONFIG_IMA_DEFAULT_TEMPLATE); return 1; } builtin_templates[num_templates - 1].fmt = str; ima_template = builtin_templates + num_templates - 1; template_setup_done = 1; return 1; } __setup("ima_template_fmt=", ima_template_fmt_setup); struct ima_template_desc *lookup_template_desc(const char *name) { struct ima_template_desc *template_desc; int found = 0; rcu_read_lock(); list_for_each_entry_rcu(template_desc, &defined_templates, list) { if ((strcmp(template_desc->name, name) == 0) || (strcmp(template_desc->fmt, name) == 0)) { found = 1; break; } } rcu_read_unlock(); return found ? template_desc : NULL; } static const struct ima_template_field * lookup_template_field(const char *field_id) { int i; for (i = 0; i < ARRAY_SIZE(supported_fields); i++) if (strncmp(supported_fields[i].field_id, field_id, IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0) return &supported_fields[i]; return NULL; } static int template_fmt_size(const char *template_fmt) { char c; int template_fmt_len = strlen(template_fmt); int i = 0, j = 0; while (i < template_fmt_len) { c = template_fmt[i]; if (c == '|') j++; i++; } return j + 1; } int template_desc_init_fields(const char *template_fmt, const struct ima_template_field ***fields, int *num_fields) { const char *template_fmt_ptr; const struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX]; int template_num_fields; int i, len; if (num_fields && *num_fields > 0) /* already initialized? */ return 0; template_num_fields = template_fmt_size(template_fmt); if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) { pr_err("format string '%s' contains too many fields\n", template_fmt); return -EINVAL; } for (i = 0, template_fmt_ptr = template_fmt; i < template_num_fields; i++, template_fmt_ptr += len + 1) { char tmp_field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN + 1]; len = strchrnul(template_fmt_ptr, '|') - template_fmt_ptr; if (len == 0 || len > IMA_TEMPLATE_FIELD_ID_MAX_LEN) { pr_err("Invalid field with length %d\n", len); return -EINVAL; } memcpy(tmp_field_id, template_fmt_ptr, len); tmp_field_id[len] = '\0'; found_fields[i] = lookup_template_field(tmp_field_id); if (!found_fields[i]) { pr_err("field '%s' not found\n", tmp_field_id); return -ENOENT; } } if (fields && num_fields) { *fields = kmalloc_array(i, sizeof(**fields), GFP_KERNEL); if (*fields == NULL) return -ENOMEM; memcpy(*fields, found_fields, i * sizeof(**fields)); *num_fields = i; } return 0; } void ima_init_template_list(void) { int i; if (!list_empty(&defined_templates)) return; spin_lock(&template_list); for (i = 0; i < ARRAY_SIZE(builtin_templates); i++) { list_add_tail_rcu(&builtin_templates[i].list, &defined_templates); } spin_unlock(&template_list); } struct ima_template_desc *ima_template_desc_current(void) { if (!ima_template) { ima_init_template_list(); ima_template = lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE); } return ima_template; } struct ima_template_desc *ima_template_desc_buf(void) { if (!ima_buf_template) { ima_init_template_list(); ima_buf_template = lookup_template_desc("ima-buf"); } return ima_buf_template; } int __init ima_init_template(void) { struct ima_template_desc *template = ima_template_desc_current(); int result; result = template_desc_init_fields(template->fmt, &(template->fields), &(template->num_fields)); if (result < 0) { pr_err("template %s init failed, result: %d\n", (strlen(template->name) ? template->name : template->fmt), result); return result; } template = ima_template_desc_buf(); if (!template) { pr_err("Failed to get ima-buf template\n"); return -EINVAL; } result = template_desc_init_fields(template->fmt, &(template->fields), &(template->num_fields)); if (result < 0) pr_err("template %s init failed, result: %d\n", (strlen(template->name) ? template->name : template->fmt), result); return result; } static struct ima_template_desc *restore_template_fmt(char *template_name) { struct ima_template_desc *template_desc = NULL; int ret; ret = template_desc_init_fields(template_name, NULL, NULL); if (ret < 0) { pr_err("attempting to initialize the template \"%s\" failed\n", template_name); goto out; } template_desc = kzalloc(sizeof(*template_desc), GFP_KERNEL); if (!template_desc) goto out; template_desc->name = ""; template_desc->fmt = kstrdup(template_name, GFP_KERNEL); if (!template_desc->fmt) { kfree(template_desc); template_desc = NULL; goto out; } spin_lock(&template_list); list_add_tail_rcu(&template_desc->list, &defined_templates); spin_unlock(&template_list); out: return template_desc; } static int ima_restore_template_data(struct ima_template_desc *template_desc, void *template_data, int template_data_size, struct ima_template_entry **entry) { struct tpm_digest *digests; int ret = 0; int i; *entry = kzalloc(struct_size(*entry, template_data, template_desc->num_fields), GFP_NOFS); if (!*entry) return -ENOMEM; digests = kcalloc(NR_BANKS(ima_tpm_chip) + ima_extra_slots, sizeof(*digests), GFP_NOFS); if (!digests) { kfree(*entry); return -ENOMEM; } (*entry)->digests = digests; ret = ima_parse_buf(template_data, template_data + template_data_size, NULL, template_desc->num_fields, (*entry)->template_data, NULL, NULL, ENFORCE_FIELDS | ENFORCE_BUFEND, "template data"); if (ret < 0) { kfree((*entry)->digests); kfree(*entry); return ret; } (*entry)->template_desc = template_desc; for (i = 0; i < template_desc->num_fields; i++) { struct ima_field_data *field_data = &(*entry)->template_data[i]; u8 *data = field_data->data; (*entry)->template_data[i].data = kzalloc(field_data->len + 1, GFP_KERNEL); if (!(*entry)->template_data[i].data) { ret = -ENOMEM; break; } memcpy((*entry)->template_data[i].data, data, field_data->len); (*entry)->template_data_len += sizeof(field_data->len); (*entry)->template_data_len += field_data->len; } if (ret < 0) { ima_free_template_entry(*entry); *entry = NULL; } return ret; } /* Restore the serialized binary measurement list without extending PCRs. */ int ima_restore_measurement_list(loff_t size, void *buf) { char template_name[MAX_TEMPLATE_NAME_LEN]; unsigned char zero[TPM_DIGEST_SIZE] = { 0 }; struct ima_kexec_hdr *khdr = buf; struct ima_field_data hdr[HDR__LAST] = { [HDR_PCR] = {.len = sizeof(u32)}, [HDR_DIGEST] = {.len = TPM_DIGEST_SIZE}, }; void *bufp = buf + sizeof(*khdr); void *bufendp; struct ima_template_entry *entry; struct ima_template_desc *template_desc; DECLARE_BITMAP(hdr_mask, HDR__LAST); unsigned long count = 0; int ret = 0; if (!buf || size < sizeof(*khdr)) return 0; if (ima_canonical_fmt) { khdr->version = le16_to_cpu((__force __le16)khdr->version); khdr->count = le64_to_cpu((__force __le64)khdr->count); khdr->buffer_size = le64_to_cpu((__force __le64)khdr->buffer_size); } if (khdr->version != 1) { pr_err("attempting to restore a incompatible measurement list"); return -EINVAL; } if (khdr->count > ULONG_MAX - 1) { pr_err("attempting to restore too many measurements"); return -EINVAL; } bitmap_zero(hdr_mask, HDR__LAST); bitmap_set(hdr_mask, HDR_PCR, 1); bitmap_set(hdr_mask, HDR_DIGEST, 1); /* * ima kexec buffer prefix: version, buffer size, count * v1 format: pcr, digest, template-name-len, template-name, * template-data-size, template-data */ bufendp = buf + khdr->buffer_size; while ((bufp < bufendp) && (count++ < khdr->count)) { int enforce_mask = ENFORCE_FIELDS; enforce_mask |= (count == khdr->count) ? ENFORCE_BUFEND : 0; ret = ima_parse_buf(bufp, bufendp, &bufp, HDR__LAST, hdr, NULL, hdr_mask, enforce_mask, "entry header"); if (ret < 0) break; if (hdr[HDR_TEMPLATE_NAME].len >= MAX_TEMPLATE_NAME_LEN) { pr_err("attempting to restore a template name that is too long\n"); ret = -EINVAL; break; } /* template name is not null terminated */ memcpy(template_name, hdr[HDR_TEMPLATE_NAME].data, hdr[HDR_TEMPLATE_NAME].len); template_name[hdr[HDR_TEMPLATE_NAME].len] = 0; if (strcmp(template_name, "ima") == 0) { pr_err("attempting to restore an unsupported template \"%s\" failed\n", template_name); ret = -EINVAL; break; } template_desc = lookup_template_desc(template_name); if (!template_desc) { template_desc = restore_template_fmt(template_name); if (!template_desc) break; } /* * Only the running system's template format is initialized * on boot. As needed, initialize the other template formats. */ ret = template_desc_init_fields(template_desc->fmt, &(template_desc->fields), &(template_desc->num_fields)); if (ret < 0) { pr_err("attempting to restore the template fmt \"%s\" failed\n", template_desc->fmt); ret = -EINVAL; break; } ret = ima_restore_template_data(template_desc, hdr[HDR_TEMPLATE_DATA].data, hdr[HDR_TEMPLATE_DATA].len, &entry); if (ret < 0) break; if (memcmp(hdr[HDR_DIGEST].data, zero, sizeof(zero))) { ret = ima_calc_field_array_hash( &entry->template_data[0], entry); if (ret < 0) { pr_err("cannot calculate template digest\n"); ret = -EINVAL; break; } } entry->pcr = !ima_canonical_fmt ? *(u32 *)(hdr[HDR_PCR].data) : le32_to_cpu(*(__le32 *)(hdr[HDR_PCR].data)); ret = ima_restore_measurement_entry(entry); if (ret < 0) break; } return ret; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip,mark type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Forceadd support */ /* 2 skbinfo support */ #define IPSET_TYPE_REV_MAX 3 /* bucketsize, initval support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>"); IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,mark"); /* Type specific function prefix */ #define HTYPE hash_ipmark #define IP_SET_HASH_WITH_MARKMASK /* IPv4 variant */ /* Member elements */ struct hash_ipmark4_elem { __be32 ip; __u32 mark; }; /* Common functions */ static bool hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1, const struct hash_ipmark4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->mark == ip2->mark; } static bool hash_ipmark4_data_list(struct sk_buff *skb, const struct hash_ipmark4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipmark4_data_next(struct hash_ipmark4_elem *next, const struct hash_ipmark4_elem *d) { next->ip = d->ip; } #define MTYPE hash_ipmark4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); e.mark = skb->mark; e.mark &= h->markmask; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_MARK))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; if (e.mark == 0 && e.ip == 0) return -IPSET_ERR_HASH_ELEM; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip = ntohl(e.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) { if (e.mark == 0 && ip_to == 0) return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++, i++) { e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_ipmark4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } /* IPv6 variant */ struct hash_ipmark6_elem { union nf_inet_addr ip; __u32 mark; }; /* Common functions */ static bool hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1, const struct hash_ipmark6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->mark == ip2->mark; } static bool hash_ipmark6_data_list(struct sk_buff *skb, const struct hash_ipmark6_elem *data) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipmark6_data_next(struct hash_ipmark6_elem *next, const struct hash_ipmark6_elem *d) { } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ipmark6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ipmark6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); e.mark = skb->mark; e.mark &= h->markmask; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ipmark6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_MARK))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; if (adt == IPSET_TEST) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; return 0; } static struct ip_set_type hash_ipmark_type __read_mostly = { .name = "hash:ip,mark", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_MARK, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipmark_create, .create_policy = { [IPSET_ATTR_MARKMASK] = { .type = NLA_U32 }, [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_MARK] = { .type = NLA_U32 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipmark_init(void) { return ip_set_type_register(&hash_ipmark_type); } static void __exit hash_ipmark_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ipmark_type); } module_init(hash_ipmark_init); module_exit(hash_ipmark_fini); |
3024 3024 3110 810 2076 1976 1518 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | // SPDX-License-Identifier: GPL-2.0 /* * linux/lib/kasprintf.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/stdarg.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> /* Simplified asprintf. */ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) { unsigned int first, second; char *p; va_list aq; va_copy(aq, ap); first = vsnprintf(NULL, 0, fmt, aq); va_end(aq); p = kmalloc_track_caller(first+1, gfp); if (!p) return NULL; second = vsnprintf(p, first+1, fmt, ap); WARN(first != second, "different return values (%u and %u) from vsnprintf(\"%s\", ...)", first, second, fmt); return p; } EXPORT_SYMBOL(kvasprintf); /* * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt * (or the sole vararg) points to rodata, we will then save a memory * allocation and string copy. In any case, the return value should be * freed using kfree_const(). */ const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap) { if (!strchr(fmt, '%')) return kstrdup_const(fmt, gfp); if (!strcmp(fmt, "%s")) return kstrdup_const(va_arg(ap, const char*), gfp); return kvasprintf(gfp, fmt, ap); } EXPORT_SYMBOL(kvasprintf_const); char *kasprintf(gfp_t gfp, const char *fmt, ...) { va_list ap; char *p; va_start(ap, fmt); p = kvasprintf(gfp, fmt, ap); va_end(ap); return p; } EXPORT_SYMBOL(kasprintf); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:net,iface type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 nomatch flag support added */ /* 2 /0 support added */ /* 3 Counters support added */ /* 4 Comments support added */ /* 5 Forceadd support added */ /* 6 skbinfo support added */ /* 7 interface wildcard support added */ #define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Type specific function prefix */ #define HTYPE hash_netiface #define IP_SET_HASH_WITH_NETS #define IP_SET_HASH_WITH_MULTI #define IP_SET_HASH_WITH_NET0 #define STRSCPY(a, b) strscpy(a, b, IFNAMSIZ) /* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; }; /* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; u8 wildcard; char iface[IFNAMSIZ]; }; /* Common functions */ static bool hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, const struct hash_netiface4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && (ip1->wildcard ? strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : strcmp(ip1->iface, ip2->iface) == 0); } static int hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr) { elem->ip &= ip_set_netmask(cidr); elem->cidr = cidr; } static bool hash_netiface4_data_list(struct sk_buff *skb, const struct hash_netiface4_elem *data) { u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_netiface4_data_next(struct hash_netiface4_elem *next, const struct hash_netiface4_elem *d) { next->ip = d->ip; } #define MTYPE hash_netiface4 #define HOST_MASK 32 #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) #include "ip_set_hash_gen.h" #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static const char *get_physindev_name(const struct sk_buff *skb) { struct net_device *dev = nf_bridge_get_physindev(skb); return dev ? dev->name : NULL; } static const char *get_physoutdev_name(const struct sk_buff *skb) { struct net_device *dev = nf_bridge_get_physoutdev(skb); return dev ? dev->name : NULL; } #endif static int hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); #define IFACE(dir) (par->state->dir ? par->state->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) e.wildcard = 1; } if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip_to < ip) swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } if (retried) ip = ntohl(h->next.ip); do { i++; e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_netiface4_data_next(&h->next, &e); return -ERANGE; } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } while (ip++ < ip_to); return ret; } /* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; }; struct hash_netiface6_elem { union nf_inet_addr ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; u8 wildcard; char iface[IFNAMSIZ]; }; /* Common functions */ static bool hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, const struct hash_netiface6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && (ip1->wildcard ? strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : strcmp(ip1->iface, ip2->iface) == 0); } static int hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr) { ip6_netmask(&elem->ip, cidr); elem->cidr = cidr; } static bool hash_netiface6_data_list(struct sk_buff *skb, const struct hash_netiface6_elem *data) { u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_netiface6_data_next(struct hash_netiface6_elem *next, const struct hash_netiface6_elem *d) { } #undef MTYPE #undef HOST_MASK #define MTYPE hash_netiface6 #define HOST_MASK 128 #define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct hash_netiface6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6_netmask(&e.ip, e.cidr); if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ip6_netmask(&e.ip, e.cidr); nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) e.wildcard = 1; } ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } static struct ip_set_type hash_netiface_type __read_mostly = { .name = "hash:net,iface", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IFACE] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_netiface_init(void) { return ip_set_type_register(&hash_netiface_type); } static void __exit hash_netiface_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_netiface_type); } module_init(hash_netiface_init); module_exit(hash_netiface_fini); |
36 720 2905 13 71 48 801 1015 150 798 801 98 1015 150 88 6937 1 6329 2065 4427 2 216 4511 12 87 3856 2 3855 670 2008 22 1 246 606 57 31 744 5548 3213 435 4563 2783 342 2 35 807 2768 2882 186 37 882 2093 52 15 2077 10 184 46 548 63 2029 64 45 113 33 103 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_H #define _LINUX_FS_H #include <linux/linkage.h> #include <linux/wait_bit.h> #include <linux/kdev_t.h> #include <linux/dcache.h> #include <linux/path.h> #include <linux/stat.h> #include <linux/cache.h> #include <linux/list.h> #include <linux/list_lru.h> #include <linux/llist.h> #include <linux/radix-tree.h> #include <linux/xarray.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/pid.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/mm_types.h> #include <linux/capability.h> #include <linux/semaphore.h> #include <linux/fcntl.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> #include <linux/shrinker.h> #include <linux/migrate_mode.h> #include <linux/uidgid.h> #include <linux/lockdep.h> #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> #include <linux/delayed_call.h> #include <linux/uuid.h> #include <linux/errseq.h> #include <linux/ioprio.h> #include <linux/fs_types.h> #include <linux/build_bug.h> #include <linux/stddef.h> #include <linux/mount.h> #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> struct backing_dev_info; struct bdi_writeback; struct bio; struct io_comp_batch; struct export_operations; struct fiemap_extent_info; struct hd_geometry; struct iovec; struct kiocb; struct kobject; struct pipe_inode_info; struct poll_table_struct; struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; struct fs_context; struct fs_parameter_spec; struct fileattr; struct iomap_ops; extern void __init inode_init(void); extern void __init inode_init_early(void); extern void __init files_init(void); extern void __init files_maxfiles_init(void); extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; typedef __kernel_rwf_t rwf_t; struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 #define MAY_READ 0x00000004 #define MAY_APPEND 0x00000008 #define MAY_ACCESS 0x00000010 #define MAY_OPEN 0x00000020 #define MAY_CHDIR 0x00000040 /* called from RCU mode, don't block */ #define MAY_NOT_BLOCK 0x00000080 /* * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open() */ /* file is open for reading */ #define FMODE_READ ((__force fmode_t)0x1) /* file is open for writing */ #define FMODE_WRITE ((__force fmode_t)0x2) /* file is seekable */ #define FMODE_LSEEK ((__force fmode_t)0x4) /* file can be accessed using pread */ #define FMODE_PREAD ((__force fmode_t)0x8) /* file can be accessed using pwrite */ #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ #define FMODE_64BITHASH ((__force fmode_t)0x400) /* * Don't update ctime and mtime. * * Currently a special hack for the XFS open_by_handle ioctl, but we'll * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. */ #define FMODE_NOCMTIME ((__force fmode_t)0x800) /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)0x1000) /* File is huge (eg. /dev/mem): treat loff_t as unsigned */ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)0x10000) /* Has read method(s) */ #define FMODE_CAN_READ ((__force fmode_t)0x20000) /* Has write method(s) */ #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) #define FMODE_OPENED ((__force fmode_t)0x80000) #define FMODE_CREATED ((__force fmode_t)0x100000) /* File is stream-like */ #define FMODE_STREAM ((__force fmode_t)0x200000) /* File supports DIRECT IO */ #define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) #define FMODE_NOREUSE ((__force fmode_t)0x800000) /* File supports non-exclusive O_DIRECT writes from multiple threads */ #define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) /* File is embedded in backing_file object */ #define FMODE_BACKING ((__force fmode_t)0x2000000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)0x8000000) /* File represents mount that needs unmounting */ #define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) /* File supports async buffered reads */ #define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) /* File supports async nowait buffered writes */ #define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000) /* * Attribute flags. These should be or-ed together to figure out what * has been changed! */ #define ATTR_MODE (1 << 0) #define ATTR_UID (1 << 1) #define ATTR_GID (1 << 2) #define ATTR_SIZE (1 << 3) #define ATTR_ATIME (1 << 4) #define ATTR_MTIME (1 << 5) #define ATTR_CTIME (1 << 6) #define ATTR_ATIME_SET (1 << 7) #define ATTR_MTIME_SET (1 << 8) #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ #define ATTR_KILL_SUID (1 << 11) #define ATTR_KILL_SGID (1 << 12) #define ATTR_FILE (1 << 13) #define ATTR_KILL_PRIV (1 << 14) #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) #define ATTR_TOUCH (1 << 17) /* * Whiteout is represented by a char device. The following constants define the * mode and device number to use. */ #define WHITEOUT_MODE 0 #define WHITEOUT_DEV 0 /* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. * Also, in this manner, a Filesystem can look at only the values it cares * about. Basically, these are the attributes that the VFS layer can * request to change from the FS layer. * * Derek Atkins <warlord@MIT.EDU> 94-10-20 */ struct iattr { unsigned int ia_valid; umode_t ia_mode; /* * The two anonymous unions wrap structures with the same member. * * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which * are a dedicated type requiring the filesystem to use the dedicated * helpers. Other filesystem can continue to use ia_{g,u}id until they * have been ported. * * They always contain the same value. In other words FS_ALLOW_IDMAP * pass down the same value on idmapped mounts as they would on regular * mounts. */ union { kuid_t ia_uid; vfsuid_t ia_vfsuid; }; union { kgid_t ia_gid; vfsgid_t ia_vfsgid; }; loff_t ia_size; struct timespec64 ia_atime; struct timespec64 ia_mtime; struct timespec64 ia_ctime; /* * Not an attribute, but an auxiliary info for filesystems wanting to * implement an ftruncate() like method. NOTE: filesystem should * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). */ struct file *ia_file; }; /* * Includes for diskquotas. */ #include <linux/quota.h> /* * Maximum number of layers of fs stack. Needs to be limited to * prevent kernel stack overflow */ #define FILESYSTEM_MAX_STACK_DEPTH 2 /** * enum positive_aop_returns - aop return codes with specific semantics * * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has * completed, that the page is still locked, and * should be considered active. The VM uses this hint * to return the page to the active list -- it won't * be a candidate for writeback again in the near * future. Other callers must be careful to unlock * the page if they get this return. Returned by * writepage(); * * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has * unlocked it and the page might have been truncated. * The caller should back up to acquiring a new page and * trying again. The aop will be taking reasonable * precautions not to livelock. If the caller held a page * reference, it should drop it before retrying. Returned * by read_folio(). * * address_space_operation functions return these large constants to indicate * special semantics to the caller. These are much larger than the bytes in a * page to allow for functions that return the number of bytes operated on in a * given page. */ enum positive_aop_returns { AOP_WRITEPAGE_ACTIVATE = 0x80000, AOP_TRUNCATED_PAGE = 0x80001, }; /* * oh the beauties of C type declarations. */ struct page; struct address_space; struct writeback_control; struct readahead_control; /* * Write life time hint values. * Stored in struct inode as u8. */ enum rw_hint { WRITE_LIFE_NOT_SET = 0, WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, }; /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC #define IOCB_SYNC (__force int) RWF_SYNC #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) #define IOCB_DIRECT (1 << 17) #define IOCB_WRITE (1 << 18) /* iocb->ki_waitq is valid */ #define IOCB_WAITQ (1 << 19) #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) /* * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the * iocb completion can be passed back to the owner for execution from a safe * context rather than needing to be punted through a workqueue. If this * flag is set, the bio completion handling may set iocb->dio_complete to a * handler function and iocb->private to context information for that handler. * The issuer should call the handler with that context information from task * context to complete the processing of the iocb. Note that while this * provides a task context for the dio_complete() callback, it should only be * used on the completion side for non-IO generating completions. It's fine to * call blocking functions from this callback, but they should not wait for * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ { IOCB_HIPRI, "HIPRI" }, \ { IOCB_DSYNC, "DSYNC" }, \ { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } struct kiocb { struct file *ki_filp; loff_t ki_pos; void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ union { /* * Only used for async buffered reads, where it denotes the * page waitqueue associated with completing the read. Valid * IFF IOCB_WAITQ is set. */ struct wait_page_queue *ki_waitq; /* * Can be used for O_DIRECT IO, where the completion handling * is punted back to the issuer of the IO. May only be set * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer * must then check for presence of this handler when ki_complete * is invoked. The data passed in to this handler must be * assigned to ->private when dio_complete is assigned. */ ssize_t (*dio_complete)(void *data); }; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) { return kiocb->ki_complete == NULL; } struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); /* Mark a folio dirty. Return true if this dirtied it */ bool (*dirty_folio)(struct address_space *, struct folio *); void (*readahead)(struct readahead_control *); int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata); int (*write_end)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t offset, size_t len); bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *folio); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* * migrate the contents of a folio to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ int (*migrate_folio)(struct address_space *, struct folio *dst, struct folio *src, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb); int (*error_remove_page)(struct address_space *, struct page *); /* swapfile support */ int (*swap_activate)(struct swap_info_struct *sis, struct file *file, sector_t *span); void (*swap_deactivate)(struct file *file); int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter); }; extern const struct address_space_operations empty_aops; /** * struct address_space - Contents of a cacheable, mappable object. * @host: Owner, either the inode or the block_device. * @i_pages: Cached pages. * @invalidate_lock: Guards coherency between page cache contents and * file offset->disk block mappings in the filesystem during invalidates. * It is also used to block modification of page cache contents through * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. * @writeback_index: Writeback starts here. * @a_ops: Methods. * @flags: Error bits and flags (AS_*). * @wb_err: The most recent error which has occurred. * @private_lock: For use by the owner of the address_space. * @private_list: For use by the owner of the address_space. * @private_data: For use by the owner of the address_space. */ struct address_space { struct inode *host; struct xarray i_pages; struct rw_semaphore invalidate_lock; gfp_t gfp_mask; atomic_t i_mmap_writable; #ifdef CONFIG_READ_ONLY_THP_FOR_FS /* number of thp, only for non-shmem files */ atomic_t nr_thps; #endif struct rb_root_cached i_mmap; unsigned long nrpages; pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; struct rw_semaphore i_mmap_rwsem; errseq_t wb_err; spinlock_t private_lock; struct list_head private_list; void *private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ #define PAGECACHE_TAG_DIRTY XA_MARK_0 #define PAGECACHE_TAG_WRITEBACK XA_MARK_1 #define PAGECACHE_TAG_TOWRITE XA_MARK_2 /* * Returns true if any of the pages in the mapping are marked with the tag. */ static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag) { return xa_marked(&mapping->i_pages, tag); } static inline void i_mmap_lock_write(struct address_space *mapping) { down_write(&mapping->i_mmap_rwsem); } static inline int i_mmap_trylock_write(struct address_space *mapping) { return down_write_trylock(&mapping->i_mmap_rwsem); } static inline void i_mmap_unlock_write(struct address_space *mapping) { up_write(&mapping->i_mmap_rwsem); } static inline int i_mmap_trylock_read(struct address_space *mapping) { return down_read_trylock(&mapping->i_mmap_rwsem); } static inline void i_mmap_lock_read(struct address_space *mapping) { down_read(&mapping->i_mmap_rwsem); } static inline void i_mmap_unlock_read(struct address_space *mapping) { up_read(&mapping->i_mmap_rwsem); } static inline void i_mmap_assert_locked(struct address_space *mapping) { lockdep_assert_held(&mapping->i_mmap_rwsem); } static inline void i_mmap_assert_write_locked(struct address_space *mapping) { lockdep_assert_held_write(&mapping->i_mmap_rwsem); } /* * Might pages of this file be mapped into userspace? */ static inline int mapping_mapped(struct address_space *mapping) { return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } /* * Might pages of this file have been modified in userspace? * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * * If i_mmap_writable is negative, no new writable mappings are allowed. You * can only deny writable mappings, if none exists right now. */ static inline int mapping_writably_mapped(struct address_space *mapping) { return atomic_read(&mapping->i_mmap_writable) > 0; } static inline int mapping_map_writable(struct address_space *mapping) { return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? 0 : -EPERM; } static inline void mapping_unmap_writable(struct address_space *mapping) { atomic_dec(&mapping->i_mmap_writable); } static inline int mapping_deny_writable(struct address_space *mapping) { return atomic_dec_unless_positive(&mapping->i_mmap_writable) ? 0 : -EBUSY; } static inline void mapping_allow_writable(struct address_space *mapping) { atomic_inc(&mapping->i_mmap_writable); } /* * Use sequence counter to get consistent i_size on 32-bit processors. */ #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include <linux/seqlock.h> #define __NEED_I_SIZE_ORDERED #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) #else #define i_size_ordered_init(inode) do { } while (0) #endif struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) /* * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to * cache the ACL. This also means that ->get_inode_acl() can be called in RCU * mode with the LOOKUP_RCU flag. */ #define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * uncached_acl_sentinel(struct task_struct *task) { return (void *)task + 1; } static inline bool is_uncached_acl(struct posix_acl *acl) { return (long)acl & 1; } #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 struct fsnotify_mark_connector; /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning * of the 'struct inode' */ struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif const struct inode_operations *i_op; struct super_block *i_sb; struct address_space *i_mapping; #ifdef CONFIG_SECURITY void *i_security; #endif /* Stat data, not accessed from path walking */ unsigned long i_ino; /* * Filesystems may only read i_nlink directly. They shall use the * following functions for modification: * * (set|clear|inc|drop)_nlink * inode_(inc|dec)_link_count */ union { const unsigned int i_nlink; unsigned int __i_nlink; }; dev_t i_rdev; loff_t i_size; struct timespec64 __i_atime; struct timespec64 __i_mtime; struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; u8 i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif /* Misc */ unsigned long i_state; struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_io_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ /* foreign inode detection, see wbc_detach_inode() */ int i_wb_frn_winner; u16 i_wb_frn_avg_time; u16 i_wb_frn_history; #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; struct list_head i_wb_list; /* backing dev writeback list */ union { struct hlist_head i_dentry; struct rcu_head i_rcu; }; atomic64_t i_version; atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) atomic_t i_readcount; /* struct files open RO */ #endif union { const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ void (*free_inode)(struct inode *); }; struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; union { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; }; __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif void *i_private; /* fs or device private pointer */ } __randomize_layout; struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); static inline unsigned int i_blocksize(const struct inode *node) { return (1 << node->i_blkbits); } static inline int inode_unhashed(struct inode *inode) { return hlist_unhashed(&inode->i_hash); } /* * __mark_inode_dirty expects inodes to be hashed. Since we don't * want special inodes in the fileset inode space, we make them * appear hashed, but do not put on any lists. hlist_del() * will work fine and require no locking. */ static inline void inode_fake_hash(struct inode *inode) { hlist_add_fake(&inode->i_hash); } /* * inode->i_mutex nesting subclasses for the lock validator: * * 0: the object of the current VFS operation * 1: parent * 2: child/target * 3: xattr * 4: second non-directory * 5: second parent (when locking independent directories in rename) * * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { I_MUTEX_NORMAL, I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, I_MUTEX_NONDIR2, I_MUTEX_PARENT2, }; static inline void inode_lock(struct inode *inode) { down_write(&inode->i_rwsem); } static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); } static inline void inode_lock_shared(struct inode *inode) { down_read(&inode->i_rwsem); } static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); } static inline int inode_trylock(struct inode *inode) { return down_write_trylock(&inode->i_rwsem); } static inline int inode_trylock_shared(struct inode *inode) { return down_read_trylock(&inode->i_rwsem); } static inline int inode_is_locked(struct inode *inode) { return rwsem_is_locked(&inode->i_rwsem); } static inline void inode_lock_nested(struct inode *inode, unsigned subclass) { down_write_nested(&inode->i_rwsem, subclass); } static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) { down_read_nested(&inode->i_rwsem, subclass); } static inline void filemap_invalidate_lock(struct address_space *mapping) { down_write(&mapping->invalidate_lock); } static inline void filemap_invalidate_unlock(struct address_space *mapping) { up_write(&mapping->invalidate_lock); } static inline void filemap_invalidate_lock_shared(struct address_space *mapping) { down_read(&mapping->invalidate_lock); } static inline int filemap_invalidate_trylock_shared( struct address_space *mapping) { return down_read_trylock(&mapping->invalidate_lock); } static inline void filemap_invalidate_unlock_shared( struct address_space *mapping) { up_read(&mapping->invalidate_lock); } void lock_two_nondirectories(struct inode *, struct inode*); void unlock_two_nondirectories(struct inode *, struct inode*); void filemap_invalidate_lock_two(struct address_space *mapping1, struct address_space *mapping2); void filemap_invalidate_unlock_two(struct address_space *mapping1, struct address_space *mapping2); /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic * with respect to the local cpu (unlike with preempt disabled), * but they don't need to be atomic with respect to other cpus like in * true SMP (so they need either to either locally disable irq around * the read or for example on x86 they can be still implemented as a * cmpxchg8b without the need of the lock prefix). For SMP compiles * and 64bit archs it makes no difference if preempt is enabled or not. */ static inline loff_t i_size_read(const struct inode *inode) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) loff_t i_size; unsigned int seq; do { seq = read_seqcount_begin(&inode->i_size_seqcount); i_size = inode->i_size; } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); return i_size; #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) loff_t i_size; preempt_disable(); i_size = inode->i_size; preempt_enable(); return i_size; #else return inode->i_size; #endif } /* * NOTE: unlike i_size_read(), i_size_write() does need locking around it * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount * can be lost, resulting in subsequent i_size_read() calls spinning forever. */ static inline void i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) preempt_disable(); write_seqcount_begin(&inode->i_size_seqcount); inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); inode->i_size = i_size; preempt_enable(); #else inode->i_size = i_size; #endif } static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); } static inline unsigned imajor(const struct inode *inode) { return MAJOR(inode->i_rdev); } struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ kuid_t uid, euid; /* uid/euid of process setting the owner */ int signum; /* posix.1b rt signal to be delivered on IO */ }; /** * struct file_ra_state - Track a file's readahead state. * @start: Where the most recent readahead started. * @size: Number of pages read in the most recent readahead. * @async_size: Numer of pages that were/are not needed immediately * and so were/are genuinely "ahead". Start next readahead when * the first of these pages is accessed. * @ra_pages: Maximum size of a readahead request, copied from the bdi. * @mmap_miss: How many mmap accesses missed in the page cache. * @prev_pos: The last byte in the most recent read request. * * When this structure is passed to ->readahead(), the "most recent" * readahead means the current readahead. */ struct file_ra_state { pgoff_t start; unsigned int size; unsigned int async_size; unsigned int ra_pages; unsigned int mmap_miss; loff_t prev_pos; }; /* * Check if @index falls in the readahead windows. */ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) { return (index >= ra->start && index < ra->start + ra->size); } /* * f_{lock,count,pos_lock} members can be highly contended and share * the same cacheline. f_{lock,mode} are very frequently used together * and so share the same cacheline as well. The read-mostly * f_{path,inode,op} are kept on a separate cacheline. */ struct file { union { struct llist_node f_llist; struct rcu_head f_rcuhead; unsigned int f_iocb_flags; }; /* * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; fmode_t f_mode; atomic_long_t f_count; struct mutex f_pos_lock; loff_t f_pos; unsigned int f_flags; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; u64 f_version; #ifdef CONFIG_SECURITY void *f_security; #endif /* needed for tty driver, and maybe others */ void *private_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct hlist_head *f_ep; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; /* for syncfs */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ unsigned char f_handle[]; }; static inline struct file *get_file(struct file *f) { atomic_long_inc(&f->f_count); return f; } struct file *get_file_rcu(struct file __rcu **f); struct file *get_file_active(struct file **f); #define file_count(x) atomic_long_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 #define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 #define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif /* legacy typedef, should eventually be removed */ typedef void *fl_owner_t; struct file_lock; /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX #define OFFSET_MAX type_max(loff_t) #define OFFT_OFFSET_MAX type_max(off_t) #endif extern void send_sigio(struct fown_struct *fown, int fd, int band); static inline struct inode *file_inode(const struct file *f) { return f->f_inode; } static inline struct dentry *file_dentry(const struct file *file) { return d_real(file->f_path.dentry, file_inode(file)); } struct fasync_struct { rwlock_t fa_lock; int magic; int fa_fd; struct fasync_struct *fa_next; /* singly linked list */ struct file *fa_file; struct rcu_head fa_rcu; }; #define FASYNC_MAGIC 0x4601 /* SMP safe fasync helpers: */ extern int fasync_helper(int, struct file *, int, struct fasync_struct **); extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); extern int fasync_remove_entry(struct file *, struct fasync_struct **); extern struct fasync_struct *fasync_alloc(void); extern void fasync_free(struct fasync_struct *); /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); /* * sb->s_flags. Note that these mirror the equivalent MS_* flags where * represented in both. */ #define SB_RDONLY BIT(0) /* Mount read-only */ #define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */ #define SB_NODEV BIT(2) /* Disallow access to device special files */ #define SB_NOEXEC BIT(3) /* Disallow program execution */ #define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */ #define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */ #define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */ #define SB_NOATIME BIT(10) /* Do not update access times. */ #define SB_NODIRATIME BIT(11) /* Do not update directory access times */ #define SB_SILENT BIT(15) #define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */ #define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */ #define SB_I_VERSION BIT(23) /* Update inode I_version field */ #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ #define SB_DEAD BIT(21) #define SB_DYING BIT(24) #define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) #define SB_BORN BIT(29) #define SB_ACTIVE BIT(30) #define SB_NOUSER BIT(31) /* These flags relate to encoding and casefolding */ #define SB_ENC_STRICT_MODE_FL (1 << 0) #define sb_has_strict_encoding(sb) \ (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) /* * Umount options */ #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ #define MNT_DETACH 0x00000002 /* Just detach from the tree */ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ #define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ /* Possible states of 'frozen' field */ enum { SB_UNFROZEN = 0, /* FS is unfrozen */ SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop * internal threads if needed) */ SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ }; #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) struct sb_writers { unsigned short frozen; /* Is sb frozen? */ unsigned short freeze_holders; /* Who froze fs? */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ unsigned char s_blocksize_bits; unsigned long s_blocksize; loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; const struct quotactl_ops *s_qcop; const struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_iflags; /* internal SB_I_* flags */ unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; int s_count; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; #endif const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif #if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; struct bdev_handle *s_bdev_handle; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; unsigned int s_quota_types; /* Bitmask of supported quota types */ struct quota_info s_dquot; /* Diskquota specific options */ struct sb_writers s_writers; /* * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and * s_fsnotify_marks together for cache efficiency. They are frequently * accessed and rarely modified. */ void *s_fs_info; /* Filesystem private info */ /* Granularity of c/m/atime in ns (cannot be worse than a second) */ u32 s_time_gran; /* Time limits for c/m/atime in seconds */ time64_t s_time_min; time64_t s_time_max; #ifdef CONFIG_FSNOTIFY __u32 s_fsnotify_mask; struct fsnotify_mark_connector __rcu *s_fsnotify_marks; #endif char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ unsigned int s_max_links; /* * The next field is for VFS *only*. No filesystems have any business * even looking at it. You had been warned. */ struct mutex s_vfs_rename_mutex; /* Kludge */ /* * Filesystem subtype. If non-empty the filesystem type field * in /proc/mounts will be "type.subtype" */ const char *s_subtype; const struct dentry_operations *s_d_op; /* default d_op for dentries */ struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; /* * Number of inode/mount/sb objects that are being watched, note that * inodes objects are currently double-accounted. */ atomic_long_t s_fsnotify_connectors; /* Read-only state of the superblock is being changed */ int s_readonly_remount; /* per-sb errseq_t for reporting writeback errors via syncfs */ errseq_t s_wb_err; /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; struct hlist_head s_pins; /* * Owning user namespace and default context in which to * interpret filesystem uids, gids, quotas, device nodes, * xattrs and security labels. */ struct user_namespace *s_user_ns; /* * The list_lru structure is essentially just a pointer to a table * of per-node lru lists, each of which has its own spinlock. * There is no need to put them into separate cachelines. */ struct list_lru s_dentry_lru; struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; struct mutex s_sync_lock; /* sync serialisation lock */ /* * Indicates how deep in a filesystem stack this SB is */ int s_stack_depth; /* s_inode_list_lock protects s_inodes */ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; /* all inodes */ spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; static inline struct user_namespace *i_user_ns(const struct inode *inode) { return inode->i_sb->s_user_ns; } /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can * instead deal with the raw numeric values that are stored * in the filesystem. */ static inline uid_t i_uid_read(const struct inode *inode) { return from_kuid(i_user_ns(inode), inode->i_uid); } static inline gid_t i_gid_read(const struct inode *inode) { return from_kgid(i_user_ns(inode), inode->i_gid); } static inline void i_uid_write(struct inode *inode, uid_t uid) { inode->i_uid = make_kuid(i_user_ns(inode), uid); } static inline void i_gid_write(struct inode *inode, gid_t gid) { inode->i_gid = make_kgid(i_user_ns(inode), gid); } /** * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping * @idmap: idmap of the mount the inode was found from * @inode: inode to map * * Return: whe inode's i_uid mapped down according to @idmap. * If the inode's i_uid has no mapping INVALID_VFSUID is returned. */ static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap, const struct inode *inode) { return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid); } /** * i_uid_needs_update - check whether inode's i_uid needs to be updated * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Check whether the $inode's i_uid field needs to be updated taking idmapped * mounts into account if the filesystem supports it. * * Return: true if @inode's i_uid field needs to be updated, false if not. */ static inline bool i_uid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { return ((attr->ia_valid & ATTR_UID) && !vfsuid_eq(attr->ia_vfsuid, i_uid_into_vfsuid(idmap, inode))); } /** * i_uid_update - update @inode's i_uid field * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_uid field translating the vfsuid of any idmapped * mount into the filesystem kuid. */ static inline void i_uid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { if (attr->ia_valid & ATTR_UID) inode->i_uid = from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid); } /** * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping * @idmap: idmap of the mount the inode was found from * @inode: inode to map * * Return: the inode's i_gid mapped down according to @idmap. * If the inode's i_gid has no mapping INVALID_VFSGID is returned. */ static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap, const struct inode *inode) { return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid); } /** * i_gid_needs_update - check whether inode's i_gid needs to be updated * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Check whether the $inode's i_gid field needs to be updated taking idmapped * mounts into account if the filesystem supports it. * * Return: true if @inode's i_gid field needs to be updated, false if not. */ static inline bool i_gid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { return ((attr->ia_valid & ATTR_GID) && !vfsgid_eq(attr->ia_vfsgid, i_gid_into_vfsgid(idmap, inode))); } /** * i_gid_update - update @inode's i_gid field * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_gid field translating the vfsgid of any idmapped * mount into the filesystem kgid. */ static inline void i_gid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { if (attr->ia_valid & ATTR_GID) inode->i_gid = from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid); } /** * inode_fsuid_set - initialize inode's i_uid field with callers fsuid * @inode: inode to initialize * @idmap: idmap of the mount the inode was found from * * Initialize the i_uid field of @inode. If the inode was found/created via * an idmapped mount map the caller's fsuid according to @idmap. */ static inline void inode_fsuid_set(struct inode *inode, struct mnt_idmap *idmap) { inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); } /** * inode_fsgid_set - initialize inode's i_gid field with callers fsgid * @inode: inode to initialize * @idmap: idmap of the mount the inode was found from * * Initialize the i_gid field of @inode. If the inode was found/created via * an idmapped mount map the caller's fsgid according to @idmap. */ static inline void inode_fsgid_set(struct inode *inode, struct mnt_idmap *idmap) { inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode)); } /** * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped * @sb: the superblock we want a mapping in * @idmap: idmap of the relevant mount * * Check whether the caller's fsuid and fsgid have a valid mapping in the * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map * the caller's fsuid and fsgid according to the @idmap first. * * Return: true if fsuid and fsgid is mapped, false if not. */ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct mnt_idmap *idmap) { struct user_namespace *fs_userns = sb->s_user_ns; kuid_t kuid; kgid_t kgid; kuid = mapped_fsuid(idmap, fs_userns); if (!uid_valid(kuid)) return false; kgid = mapped_fsgid(idmap, fs_userns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && kgid_has_mapping(fs_userns, kgid); } struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); static inline time64_t inode_get_atime_sec(const struct inode *inode) { return inode->__i_atime.tv_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { return inode->__i_atime.tv_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { return inode->__i_atime; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { inode->__i_atime = ts; return ts; } static inline struct timespec64 inode_set_atime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_atime_to_ts(inode, ts); } static inline time64_t inode_get_mtime_sec(const struct inode *inode) { return inode->__i_mtime.tv_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { return inode->__i_mtime.tv_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { return inode->__i_mtime; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { inode->__i_mtime = ts; return ts; } static inline struct timespec64 inode_set_mtime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_mtime_to_ts(inode, ts); } static inline time64_t inode_get_ctime_sec(const struct inode *inode) { return inode->__i_ctime.tv_sec; } static inline long inode_get_ctime_nsec(const struct inode *inode) { return inode->__i_ctime.tv_nsec; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) { return inode->__i_ctime; } static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { inode->__i_ctime = ts; return ts; } /** * inode_set_ctime - set the ctime in the inode * @inode: inode in which to set the ctime * @sec: tv_sec value to set * @nsec: tv_nsec value to set * * Set the ctime in @inode to { @sec, @nsec } */ static inline struct timespec64 inode_set_ctime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_ctime_to_ts(inode, ts); } struct timespec64 simple_inode_init_ts(struct inode *inode); /* * Snapshotting support. */ /* * These are internal functions, please use sb_start_{write,pagefault,intwrite} * instead. */ static inline void __sb_end_write(struct super_block *sb, int level) { percpu_up_read(sb->s_writers.rw_sem + level-1); } static inline void __sb_start_write(struct super_block *sb, int level) { percpu_down_read(sb->s_writers.rw_sem + level - 1); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) { return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); } #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) #define __sb_writers_release(sb, lev) \ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) static inline bool sb_write_started(const struct super_block *sb) { return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1); } /** * sb_end_write - drop write access to a superblock * @sb: the super we wrote to * * Decrement number of writers to the filesystem. Wake up possible waiters * wanting to freeze the filesystem. */ static inline void sb_end_write(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_WRITE); } /** * sb_end_pagefault - drop write access to a superblock from a page fault * @sb: the super we wrote to * * Decrement number of processes handling write page fault to the filesystem. * Wake up possible waiters wanting to freeze the filesystem. */ static inline void sb_end_pagefault(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_PAGEFAULT); } /** * sb_end_intwrite - drop write access to a superblock for internal fs purposes * @sb: the super we wrote to * * Decrement fs-internal number of writers to the filesystem. Wake up possible * waiters wanting to freeze the filesystem. */ static inline void sb_end_intwrite(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_FS); } /** * sb_start_write - get write access to a superblock * @sb: the super we write to * * When a process wants to write data or metadata to a file system (i.e. dirty * a page or an inode), it should embed the operation in a sb_start_write() - * sb_end_write() pair to get exclusion against file system freezing. This * function increments number of writers preventing freezing. If the file * system is already frozen, the function waits until the file system is * thawed. * * Since freeze protection behaves as a lock, users have to preserve * ordering of freeze protection and other filesystem locks. Generally, * freeze protection should be the outermost lock. In particular, we have: * * sb_start_write * -> i_mutex (write path, truncate, directory ops, ...) * -> s_umount (freeze_super, thaw_super) */ static inline void sb_start_write(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_WRITE); } static inline bool sb_start_write_trylock(struct super_block *sb) { return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** * sb_start_pagefault - get write access to a superblock from a page fault * @sb: the super we write to * * When a process starts handling write page fault, it should embed the * operation into sb_start_pagefault() - sb_end_pagefault() pair to get * exclusion against file system freezing. This is needed since the page fault * is going to dirty a page. This function increments number of running page * faults preventing freezing. If the file system is already frozen, the * function waits until the file system is thawed. * * Since page fault freeze protection behaves as a lock, users have to preserve * ordering of freeze protection and other filesystem locks. It is advised to * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault * handling code implies lock dependency: * * mmap_lock * -> sb_start_pagefault */ static inline void sb_start_pagefault(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /** * sb_start_intwrite - get write access to a superblock for internal fs purposes * @sb: the super we write to * * This is the third level of protection against filesystem freezing. It is * free for use by a filesystem. The only requirement is that it must rank * below sb_start_pagefault. * * For example filesystem can call sb_start_intwrite() when starting a * transaction which somewhat eases handling of freezing for internal sources * of filesystem changes (internal fs threads, discarding preallocation on file * close, etc.). */ static inline void sb_start_intwrite(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_FS); } static inline bool sb_start_intwrite_trylock(struct super_block *sb) { return __sb_start_write_trylock(sb, SB_FREEZE_FS); } bool inode_owner_or_capable(struct mnt_idmap *idmap, const struct inode *inode); /* * VFS helper functions.. */ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); int vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); /** * struct renamedata - contains all information required for renaming * @old_mnt_idmap: idmap of the old mount the inode was found from * @old_dir: parent of source * @old_dentry: source * @new_mnt_idmap: idmap of the new mount the inode was found from * @new_dir: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { struct mnt_idmap *old_mnt_idmap; struct inode *old_dir; struct dentry *old_dentry; struct mnt_idmap *new_mnt_idmap; struct inode *new_dir; struct dentry *new_dentry; struct inode **delegated_inode; unsigned int flags; } __randomize_layout; int vfs_rename(struct renamedata *); static inline int vfs_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); } struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred); struct file *kernel_file_open(const struct path *path, int flags, struct inode *inode, const struct cred *cred); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); int vfs_utimes(const struct path *path, struct timespec64 *times); extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #else #define compat_ptr_ioctl NULL #endif /* * VFS file helper functions. */ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode); /* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. * This allows the kernel to read directories into kernel space or * to have different dirent layouts depending on the binary type. * Return 'true' to keep going and 'false' if there are no more entries. */ struct dir_context; typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); struct dir_context { filldir_t actor; loff_t pos; }; /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. * * NOMMU_MAP_COPY: Copy can be mapped (MAP_PRIVATE) * NOMMU_MAP_DIRECT: Can be mapped directly (MAP_SHARED) * NOMMU_MAP_READ: Can be mapped for reading * NOMMU_MAP_WRITE: Can be mapped for writing * NOMMU_MAP_EXEC: Can be mapped for execution */ #define NOMMU_MAP_COPY 0x00000001 #define NOMMU_MAP_DIRECT 0x00000008 #define NOMMU_MAP_READ VM_MAYREAD #define NOMMU_MAP_WRITE VM_MAYWRITE #define NOMMU_MAP_EXEC VM_MAYEXEC #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) /* * These flags control the behavior of the remap_file_range function pointer. * If it is called with len == 0 that means "remap to end of source file". * See Documentation/filesystems/vfs.rst for more details about this call. * * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request */ #define REMAP_FILE_DEDUP (1 << 0) #define REMAP_FILE_CAN_SHORTEN (1 << 1) /* * These flags signal that the caller is ok with altering various aspects of * the behavior of the remap operation. The changes must be made by the * implementation; the vfs remap helper functions can take advantage of them. * Flags in this category exist to preserve the quirky behavior of the hoisted * btrfs clone/dedupe ioctls. */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) /* * These flags control the behavior of vfs_copy_file_range(). * They are not available to the user via syscall. * * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops */ #define COPY_FILE_SPLICE (1 << 0) struct iov_iter; struct io_uring_cmd; struct offset_ctx; struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, unsigned int flags); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); int (*setlease)(struct file *, int, struct file_lock **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); #ifndef CONFIG_MMU unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); } __randomize_layout; /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, int (*) (struct file *, struct dir_context *)); #define WRAP_DIR_ITER(x) \ static int shared_##x(struct file *file , struct dir_context *ctx) \ { return wrap_directory_iterator(file, ctx, x); } struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct mnt_idmap *, struct inode *, int); struct posix_acl * (*get_inode_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); int (*create) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *, const char *); int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *); int (*getattr) (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct mnt_idmap *, struct inode *, struct file *, umode_t); struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *, int); int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, struct iov_iter *iter) { return file->f_op->read_iter(kio, iter); } static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, struct iov_iter *iter) { return file->f_op->write_iter(kio, iter); } static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { return file->f_op->mmap(file, vma); } extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags); int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, const struct iomap_ops *dax_read_ops); int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags); enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), }; struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); void (*free_inode)(struct inode *); void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_super) (struct super_block *, enum freeze_holder who); int (*freeze_fs) (struct super_block *); int (*thaw_super) (struct super_block *, enum freeze_holder who); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); struct dquot **(*get_dquots)(struct inode *); #endif long (*nr_cached_objects)(struct super_block *, struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); void (*shutdown)(struct super_block *sb); }; /* * Inode flags - they have no relation to superblock flags now */ #define S_SYNC (1 << 0) /* Writes are synced at once */ #define S_NOATIME (1 << 1) /* Do not update access times */ #define S_APPEND (1 << 2) /* Append-only file */ #define S_IMMUTABLE (1 << 3) /* Immutable file */ #define S_DEAD (1 << 4) /* removed, but still open directory */ #define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */ #define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */ #define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */ #define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE (1 << 9) /* Inode is fs-internal */ #define S_IMA (1 << 10) /* Inode has an associated IMA struct */ #define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */ #define S_NOSEC (1 << 12) /* no suid or xattr security attributes */ #ifdef CONFIG_FS_DAX #define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */ #else #define S_DAX 0 /* Make all the DAX code disappear */ #endif #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system * flags just means all the inodes inherit those flags by default. It might be * possible to override it selectively if you really wanted to with some * ioctl() that is not currently implemented. * * Exception: SB_RDONLY is always applied to the entire file system. * * Unfortunately, it is possible to change a filesystems flags with it mounted * with files in use. This means that all of the inodes will not have their * i_flags updated. Hence, i_flags no longer inherit the superblock mount * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org */ #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; } #define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) #define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, SB_MANDLOCK) #define IS_NOATIME(inode) __IS_FLG(inode, SB_RDONLY|SB_NOATIME) #define IS_I_VERSION(inode) __IS_FLG(inode, SB_I_VERSION) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) #ifdef CONFIG_FS_POSIX_ACL #define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL) #else #define IS_POSIXACL(inode) 0 #endif #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) #define IS_DAX(inode) ((inode)->i_flags & S_DAX) #define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD) #define IS_VERITY(inode) ((inode)->i_flags & S_VERITY) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) { return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)); } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = filp->f_iocb_flags, .ki_ioprio = get_current_ioprio(), }; } static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = kiocb_src->ki_flags, .ki_ioprio = kiocb_src->ki_ioprio, .ki_pos = kiocb_src->ki_pos, }; } /* * Inode state bits. Protected by inode->i_lock * * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. * * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at * various stages of removing an inode. * * Two bits are used for locking and completion notification, I_NEW and I_SYNC. * * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on * fdatasync() (unless I_DIRTY_DATASYNC is also set). * Timestamp updates are the usual cause. * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of * these changes separately from I_DIRTY_SYNC so that we * don't have to write inode on fdatasync() when only * e.g. the timestamps have changed. * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. * I_DIRTY_TIME The inode itself has dirty timestamps, and the * lazytime mount option is enabled. We keep track of this * separately from I_DIRTY_SYNC in order to implement * lazytime. This gets cleared if I_DIRTY_INODE * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already * in place because writeback might already be in progress * and we don't want to lose the time update * I_NEW Serves as both a mutex and completion notification. * New inodes set I_NEW. If two processes both create * the same inode, one of them will release its inode and * wait for I_NEW to be released before returning. * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can * also cause waiting on I_NEW, without I_NEW actually * being set. find_inode() uses this to prevent returning * nearly-dead inodes. * I_WILL_FREE Must be set when calling write_inode_now() if i_count * is zero. I_FREEING must be set when I_WILL_FREE is * cleared. * I_FREEING Set when inode is about to be freed but still has dirty * pages or buffers attached or the inode itself is still * dirty. * I_CLEAR Added by clear_inode(). In this state the inode is * clean and can be destroyed. Inode keeps I_FREEING. * * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are * prohibited for many purposes. iget() must wait for * the inode to be completely released, then create it * anew. Other functions will just ignore such inodes, * if appropriate. I_NEW is used for waiting. * * I_SYNC Writeback of inode is running. The bit is set during * data writeback, and cleared with a wakeup on the bit * address once it is done. The bit is also used to pin * the inode in memory for flusher thread. * * I_REFERENCED Marks the inode as recently references on the LRU list. * * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). * * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell * wb stat updates to grab the i_pages lock. See * inode_switch_wbs_work_fn() for details. * * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * and work dirs among overlayfs mounts. * * I_CREATING New object's inode in the middle of setting up. * * I_DONTCACHE Evict inode as soon as it is not used anymore. * * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. * Used to detect that mark_inode_dirty() should not move * inode between dirty lists. * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) #define I_DIRTY_DATASYNC (1 << 1) #define I_DIRTY_PAGES (1 << 2) #define __I_NEW 3 #define I_NEW (1 << __I_NEW) #define I_WILL_FREE (1 << 4) #define I_FREEING (1 << 5) #define I_CLEAR (1 << 6) #define __I_SYNC 7 #define I_SYNC (1 << __I_SYNC) #define I_REFERENCED (1 << 8) #define __I_DIO_WAKEUP 9 #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) #define I_PINNING_FSCACHE_WB (1 << 18) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) extern void __mark_inode_dirty(struct inode *, int); static inline void mark_inode_dirty(struct inode *inode) { __mark_inode_dirty(inode, I_DIRTY); } static inline void mark_inode_dirty_sync(struct inode *inode) { __mark_inode_dirty(inode, I_DIRTY_SYNC); } /* * Returns true if the given inode itself only has dirty timestamps (its pages * may still be dirty) and isn't currently being allocated or freed. * Filesystems should call this if when writing an inode when lazytime is * enabled, they want to opportunistically write the timestamps of other inodes * located very nearby on-disk, e.g. in the same inode block. This returns true * if the given inode is in need of such an opportunistic update. Requires * i_lock, or at least later re-checking under i_lock. */ static inline bool inode_is_dirtytime_only(struct inode *inode) { return (inode->i_state & (I_DIRTY_TIME | I_NEW | I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME; } extern void inc_nlink(struct inode *inode); extern void drop_nlink(struct inode *inode); extern void clear_nlink(struct inode *inode); extern void set_nlink(struct inode *inode, unsigned int nlink); static inline void inode_inc_link_count(struct inode *inode) { inc_nlink(inode); mark_inode_dirty(inode); } static inline void inode_dec_link_count(struct inode *inode) { drop_nlink(inode); mark_inode_dirty(inode); } enum file_time_flags { S_ATIME = 1, S_MTIME = 2, S_CTIME = 4, S_VERSION = 8, }; extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); int inode_update_time(struct inode *inode, int flags); static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) touch_atime(&file->f_path); } extern int file_modified(struct file *file); int kiocb_modified(struct kiocb *iocb); int sync_inode_metadata(struct inode *inode, int wait); struct file_system_type { const char *name; int fs_flags; #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; struct hlist_head fs_supers; struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; struct lock_class_key s_vfs_rename_key; struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; struct lock_class_key i_lock_key; struct lock_class_key i_mutex_key; struct lock_class_key invalidate_lock_key; struct lock_class_key i_mutex_dir_key; }; #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); int set_anon_super_fc(struct super_block *s, struct fs_context *fc); int get_anon_bdev(dev_t *); void free_anon_bdev(dev_t); struct super_block *sget_fc(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*set)(struct super_block *, struct fs_context *)); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); struct super_block *sget_dev(struct fs_context *fc, dev_t dev); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) #define fops_put(fops) \ do { if (fops) module_put((fops)->owner); } while(0) /* * This one is to be used *ONLY* from ->open() instances. * fops must be non-NULL, pinned down *and* module dependencies * should be sufficient to pin the caller down as well. */ #define replace_fops(f, fops) \ do { \ struct file *__file = (f); \ fops_put(__file->f_op); \ BUG_ON(!(__file->f_op = (fops))); \ } while(0) extern int register_filesystem(struct file_system_type *); extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); int freeze_super(struct super_block *super, enum freeze_holder who); int thaw_super(struct super_block *super, enum freeze_holder who); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); int inode_update_timestamps(struct inode *inode, int flags); int generic_update_time(struct inode *, int); /* /sys/fs */ extern struct kobject *fs_kobj; #define MAX_RW_COUNT (INT_MAX & PAGE_MASK) /* fs/open.c */ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ atomic_t refcnt; struct audit_names *aname; const char iname[]; }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } /** * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { return mnt_idmap(mnt) != &nop_mnt_idmap; } extern long vfs_truncate(const struct path *, loff_t); int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode); extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(const struct path *, const char *, int, umode_t); static inline struct file *file_open_root_mnt(struct vfsmount *mnt, const char *name, int flags, umode_t mode) { return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root}, name, flags, mode); } struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct file *backing_file_open(const struct path *user_path, int flags, const struct path *real_path, const struct cred *cred); struct path *backing_file_user_path(struct file *f); /* * file_user_path - get the path to display for memory mapped file * * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file * stored in ->vm_file is a backing file whose f_inode is on the underlying * filesystem. When the mapped file path is displayed to user (e.g. via * /proc/<pid>/maps), this helper should be used to get the path to display * to the user, which is the path of the fd that user has requested to map. */ static inline const struct path *file_user_path(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return backing_file_user_path(f); return &f->f_path; } static inline struct file *file_clone_open(struct file *file) { return dentry_open(&file->f_path, file->f_flags, file->f_cred); } extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int, int *); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); extern void putname(struct filename *name); extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); /* Helper for the simple case when original dentry is used */ static inline int finish_open_simple(struct file *file, int error) { if (error) return error; return finish_open(file, file->f_path.dentry, NULL); } /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(void); extern struct kmem_cache *names_cachep; #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) extern struct super_block *blockdev_superblock; static inline bool sb_is_blkdev_sb(struct super_block *sb) { return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; } void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; /* fs/char_dev.c */ #define CHRDEV_MAJOR_MAX 512 /* Marks the bottom of the first segment of free char majors */ #define CHRDEV_MAJOR_DYN_END 234 /* Marks the top and bottom of the second segment of free char majors */ #define CHRDEV_MAJOR_DYN_EXT_START 511 #define CHRDEV_MAJOR_DYN_EXT_END 384 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name, const struct file_operations *fops); extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name); extern void unregister_chrdev_region(dev_t, unsigned); extern void chrdev_show(struct seq_file *,off_t); static inline int register_chrdev(unsigned int major, const char *name, const struct file_operations *fops) { return __register_chrdev(major, 0, 256, name, fops); } static inline void unregister_chrdev(unsigned int major, const char *name) { __unregister_chrdev(major, 0, 256, name); } extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, loff_t lend); extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); static inline int file_write_and_wait(struct file *file) { return file_write_and_wait_range(file, 0, LLONG_MAX); } extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, unsigned int flags); static inline bool iocb_is_dsync(const struct kiocb *iocb) { return (iocb->ki_flags & IOCB_DSYNC) || IS_SYNC(iocb->ki_filp->f_mapping->host); } /* * Sync the bytes written if this was a synchronous write. Expect ki_pos * to already be updated for the write, and will return either the amount * of bytes passed in, or an error if syncing the file failed. */ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) { if (iocb_is_dsync(iocb)) { int ret = vfs_fsync_range(iocb->ki_filp, iocb->ki_pos - count, iocb->ki_pos - 1, (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); if (ret) return ret; } return count; } extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK extern int bmap(struct inode *inode, sector_t *block); #else static inline int bmap(struct inode *inode, sector_t *block) { return -EINVAL; } #endif int notify_change(struct mnt_idmap *, struct dentry *, struct iattr *, struct inode **); int inode_permission(struct mnt_idmap *, struct inode *, int); int generic_permission(struct mnt_idmap *, struct inode *, int); static inline int file_permission(struct file *file, int mask) { return inode_permission(file_mnt_idmap(file), file_inode(file), mask); } static inline int path_permission(const struct path *path, int mask) { return inode_permission(mnt_idmap(path->mnt), d_inode(path->dentry), mask); } int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) { return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); } static inline bool inode_wrong_type(const struct inode *inode, umode_t mode) { return (inode->i_mode ^ mode) & S_IFMT; } /** * file_start_write - get write access to a superblock for regular file io * @file: the file we want to write to * * This is a variant of sb_start_write() which is a noop on non-regualr file. * Should be matched with a call to file_end_write(). */ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; return sb_start_write_trylock(file_inode(file)->i_sb); } /** * file_end_write - drop write access to a superblock of a regular file * @file: the file we wrote to * * Should be matched with a call to file_start_write(). */ static inline void file_end_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; sb_end_write(file_inode(file)->i_sb); } /** * kiocb_start_write - get write access to a superblock for async file io * @iocb: the io context we want to submit the write with * * This is a variant of sb_start_write() for async io submission. * Should be matched with a call to kiocb_end_write(). */ static inline void kiocb_start_write(struct kiocb *iocb) { struct inode *inode = file_inode(iocb->ki_filp); sb_start_write(inode->i_sb); /* * Fool lockdep by telling it the lock got released so that it * doesn't complain about the held lock when we return to userspace. */ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); } /** * kiocb_end_write - drop write access to a superblock after async file io * @iocb: the io context we sumbitted the write with * * Should be matched with a call to kiocb_start_write(). */ static inline void kiocb_end_write(struct kiocb *iocb) { struct inode *inode = file_inode(iocb->ki_filp); /* * Tell lockdep we inherited freeze protection from submission thread. */ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); sb_end_write(inode->i_sb); } /* * This is used for regular files where some users -- especially the * currently executed binary in a process, previously handled via * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap * read-write shared) accesses. * * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. * deny_write_access() denies write access to a file. * allow_write_access() re-enables write access to a file. * * The i_writecount field of an inode can have the following values: * 0: no write access, no denied write access * < 0: (-i_writecount) users that denied write access to the file. * > 0: (i_writecount) users that have write access to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to * use {get,deny}_write_access() - these functions check the sign and refuse * to do the change if sign is wrong. */ static inline int get_write_access(struct inode *inode) { return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; } static inline int deny_write_access(struct file *file) { struct inode *inode = file_inode(file); return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; } static inline void put_write_access(struct inode * inode) { atomic_dec(&inode->i_writecount); } static inline void allow_write_access(struct file *file) { if (file) atomic_inc(&file_inode(file)->i_writecount); } static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; } #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) static inline void i_readcount_dec(struct inode *inode) { BUG_ON(atomic_dec_return(&inode->i_readcount) < 0); } static inline void i_readcount_inc(struct inode *inode) { atomic_inc(&inode->i_readcount); } #else static inline void i_readcount_dec(struct inode *inode) { return; } static inline void i_readcount_inc(struct inode *inode) { return; } #endif extern int do_pipe_flags(int *, int); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern bool is_subdir(struct dentry *, struct dentry *); extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); #include <linux/err.h> /* needed for stackable file system support */ extern loff_t default_llseek(struct file *file, loff_t offset, int whence); extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); extern void address_space_init_once(struct address_space *mapping); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); extern struct inode *ilookup(struct super_block *sb, unsigned long ino); extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data); extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); extern struct inode *find_inode_nowait(struct super_block *, unsigned long, int (*match)(struct inode *, unsigned long, void *), void *data); extern struct inode *find_inode_rcu(struct super_block *, unsigned long, int (*)(struct inode *, void *), void *); extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void lockdep_annotate_inode_mutex_key(struct inode *inode); #else static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); void dump_mapping(const struct address_space *); /* * Userspace may rely on the inode number being non-zero. For example, glibc * simply ignores files with zero i_ino in unlink() and other places. * * As an additional complication, if userspace was compiled with * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the * lower 32 bits, so we need to check that those aren't zero explicitly. With * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but * better safe than sorry. */ static inline bool is_zero_ino(ino_t ino) { return (u32)ino == 0; } extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); /* * This must be used for allocating filesystems specific inodes to set * up the inode reclaim context correctly. */ static inline void * alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp) { return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp); } extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } extern void __remove_inode_hash(struct inode *); static inline void remove_inode_hash(struct inode *inode) { if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash)) __remove_inode_hash(inode); } extern void inode_sb_list_add(struct inode *inode); extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to, ssize_t already_read); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); ssize_t generic_perform_write(struct kiocb *, struct iov_iter *); ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, ssize_t direct_written, ssize_t buffered_written); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, struct iov_iter *iter); ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, struct iov_iter *iter); /* fs/splice.c */ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); ssize_t copy_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, loff_t *opos, size_t len, unsigned int flags); extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); #define no_llseek NULL extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); int rw_verify_area(int, struct file *, const loff_t *, size_t); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); extern int stream_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, loff_t file_offset); enum { /* need locking between buffered and direct access */ DIO_LOCKING = 0x01, /* filesystem does not support filling holes */ DIO_SKIP_HOLES = 0x02, }; ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, get_block_t get_block, dio_iodone_t end_io, int flags); static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct iov_iter *iter, get_block_t get_block) { return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif void inode_dio_wait(struct inode *inode); /** * inode_dio_begin - signal start of a direct I/O requests * @inode: inode the direct I/O happens on * * This is called once we've finished processing a direct I/O request, * and is used to wake up callers waiting for direct I/O to be quiesced. */ static inline void inode_dio_begin(struct inode *inode) { atomic_inc(&inode->i_dio_count); } /** * inode_dio_end - signal finish of a direct I/O requests * @inode: inode the direct I/O happens on * * This is called once we've finished processing a direct I/O request, * and is used to wake up callers waiting for direct I/O to be quiesced. */ static inline void inode_dio_end(struct inode *inode) { if (atomic_dec_and_test(&inode->i_dio_count)) wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); } extern void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask); extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); static inline loff_t __inode_get_bytes(struct inode *inode) { return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; } loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); const char *simple_get_link(struct dentry *, struct inode *, struct delayed_call *); extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags); int vfs_fstat(int fd, struct kstat *stat); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, filename, stat, 0); } static inline int vfs_lstat(const char __user *name, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); } extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern int simple_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata); extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); extern int simple_nosetlease(struct file *, int, struct file_lock **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; extern void make_empty_dir_inode(struct inode *inode); extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { const char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); struct offset_ctx { struct xarray xa; u32 next_offset; }; void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); void simple_offset_destroy(struct offset_ctx *octx); extern const struct file_operations simple_offset_dir_operations; extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); void setattr_copy(struct mnt_idmap *, struct inode *inode, const struct iattr *attr); extern int file_update_time(struct file *file); static inline bool vma_is_dax(const struct vm_area_struct *vma) { return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } static inline bool vma_is_fsdax(struct vm_area_struct *vma) { struct inode *inode; if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file) return false; if (!vma_is_dax(vma)) return false; inode = file_inode(vma->vm_file); if (S_ISCHR(inode->i_mode)) return false; /* device-dax */ return true; } static inline int iocb_flags(struct file *file) { int res = 0; if (file->f_flags & O_APPEND) res |= IOCB_APPEND; if (file->f_flags & O_DIRECT) res |= IOCB_DIRECT; if (file->f_flags & O_DSYNC) res |= IOCB_DSYNC; if (file->f_flags & __O_SYNC) res |= IOCB_SYNC; return res; } static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { int kiocb_flags = 0; /* make sure there's no overlap between RWF and private IOCB flags */ BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD); if (!flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; kiocb_flags |= IOCB_NOIO; } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; ki->ki_flags |= kiocb_flags; return 0; } static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; /* * Don't strictly need d_lock here? If the parent ino could change * then surely we'd have a deeper race in the caller? */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); return res; } /* Transaction based IO helpers */ /* * An argresp is stored in an allocated page and holds the * size of the argument or response, along with its content */ struct simple_transaction_argresp { ssize_t size; char data[]; }; #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) char *simple_transaction_get(struct file *file, const char __user *buf, size_t size); ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos); int simple_transaction_release(struct inode *inode, struct file *file); void simple_transaction_set(struct file *file, size_t n); /* * simple attribute files * * These attributes behave similar to those in sysfs: * * Writing to an attribute immediately sets a value, an open file can be * written to multiple times. * * Reading from an attribute creates a buffer from the value that might get * read with multiple read calls. When the attribute has been read * completely, no further read calls are possible until the file is opened * again. * * All attributes contain a text representation of a numeric value * that are accessed with the get() and set() functions. */ #define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ __simple_attr_check_format(__fmt, 0ull); \ return simple_attr_open(inode, file, __get, __set, __fmt); \ } \ static const struct file_operations __fops = { \ .owner = THIS_MODULE, \ .open = __fops ## _open, \ .release = simple_attr_release, \ .read = simple_attr_read, \ .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \ .llseek = generic_file_llseek, \ } #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) #define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) static inline __printf(1, 2) void __simple_attr_check_format(const char *fmt, ...) { /* don't do anything, just let the compiler check the arguments; */ } int simple_attr_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt); int simple_attr_release(struct inode *inode, struct file *file); ssize_t simple_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) #define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) static inline bool is_sxid(umode_t mode) { return mode & (S_ISUID | S_ISGID); } static inline int check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) { if (!(dir->i_mode & S_ISVTX)) return 0; return __check_sticky(idmap, dir, inode); } static inline void inode_has_no_xattr(struct inode *inode) { if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC)) inode->i_flags |= S_NOSEC; } static inline bool is_root_inode(struct inode *inode) { return inode == inode->i_sb->s_root->d_inode; } static inline bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type) { return ctx->actor(ctx, name, namelen, ctx->pos, ino, type); } static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, ".", 1, ctx->pos, file->f_path.dentry->d_inode->i_ino, DT_DIR); } static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, "..", 2, ctx->pos, parent_ino(file->f_path.dentry), DT_DIR); } static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) { if (ctx->pos == 0) { if (!dir_emit_dot(file, ctx)) return false; ctx->pos = 1; } if (ctx->pos == 1) { if (!dir_emit_dotdot(file, ctx)) return false; ctx->pos = 2; } return true; } static inline bool dir_relax(struct inode *inode) { inode_unlock(inode); inode_lock(inode); return !IS_DEADDIR(inode); } static inline bool dir_relax_shared(struct inode *inode) { inode_unlock_shared(inode); inode_lock_shared(inode); return !IS_DEADDIR(inode); } extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); /* mm/fadvise.c */ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice); extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); #endif /* _LINUX_FS_H */ |
7954 4097 568 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RCULIST_BL_H #define _LINUX_RCULIST_BL_H /* * RCU-protected bl list version. See include/linux/list_bl.h. */ #include <linux/list_bl.h> #include <linux/rcupdate.h> static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, struct hlist_bl_node *n) { LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) != LIST_BL_LOCKMASK); rcu_assign_pointer(h->first, (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK)); } static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) { return (struct hlist_bl_node *) ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); } /** * hlist_bl_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. * * Note: hlist_bl_unhashed() on entry does not return true after this, * the entry is in an undefined state. It is useful for RCU based * lockfree traversal. * * In particular, it means that we can not poison the forward * pointers that may still be used for walking the hash list. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_bl_add_head_rcu() * or hlist_bl_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_bl_for_each_entry(). */ static inline void hlist_bl_del_rcu(struct hlist_bl_node *n) { __hlist_bl_del(n); n->pprev = LIST_POISON2; } /** * hlist_bl_add_head_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist_bl, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_bl_add_head_rcu() * or hlist_bl_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_bl_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, struct hlist_bl_head *h) { struct hlist_bl_node *first; /* don't need hlist_bl_first_rcu because we're under lock */ first = hlist_bl_first(h); n->next = first; if (first) first->pprev = &n->next; n->pprev = &h->first; /* need _rcu because we can have concurrent lock free readers */ hlist_bl_set_first_rcu(h, n); } /** * hlist_bl_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_bl_node to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_bl_node within the struct. * */ #define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \ for (pos = hlist_bl_first_rcu(head); \ pos && \ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(pos->next)) #endif |
33374 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | /* SPDX-License-Identifier: GPL-2.0+ */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rseq #if !defined(_TRACE_RSEQ_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RSEQ_H #include <linux/tracepoint.h> #include <linux/types.h> TRACE_EVENT(rseq_update, TP_PROTO(struct task_struct *t), TP_ARGS(t), TP_STRUCT__entry( __field(s32, cpu_id) __field(s32, node_id) __field(s32, mm_cid) ), TP_fast_assign( __entry->cpu_id = raw_smp_processor_id(); __entry->node_id = cpu_to_node(__entry->cpu_id); __entry->mm_cid = task_mm_cid(t); ), TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id, __entry->node_id, __entry->mm_cid) ); TRACE_EVENT(rseq_ip_fixup, TP_PROTO(unsigned long regs_ip, unsigned long start_ip, unsigned long post_commit_offset, unsigned long abort_ip), TP_ARGS(regs_ip, start_ip, post_commit_offset, abort_ip), TP_STRUCT__entry( __field(unsigned long, regs_ip) __field(unsigned long, start_ip) __field(unsigned long, post_commit_offset) __field(unsigned long, abort_ip) ), TP_fast_assign( __entry->regs_ip = regs_ip; __entry->start_ip = start_ip; __entry->post_commit_offset = post_commit_offset; __entry->abort_ip = abort_ip; ), TP_printk("regs_ip=0x%lx start_ip=0x%lx post_commit_offset=%lu abort_ip=0x%lx", __entry->regs_ip, __entry->start_ip, __entry->post_commit_offset, __entry->abort_ip) ); #endif /* _TRACE_SOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
4 1 4 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 | /* * linux/fs/nls/nls_cp863.c * * Charset cp863 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00c2, 0x00e0, 0x00b6, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x2017, 0x00c0, 0x00a7, /* 0x90*/ 0x00c9, 0x00c8, 0x00ca, 0x00f4, 0x00cb, 0x00cf, 0x00fb, 0x00f9, 0x00a4, 0x00d4, 0x00dc, 0x00a2, 0x00a3, 0x00d9, 0x00db, 0x0192, /* 0xa0*/ 0x00a6, 0x00b4, 0x00f3, 0x00fa, 0x00a8, 0x00b8, 0x00b3, 0x00af, 0x00ce, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00be, 0x00ab, 0x00bb, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, /* 0xd0*/ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, /* 0xe0*/ 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, /* 0xf0*/ 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0x00, 0x9b, 0x9c, 0x98, 0x00, 0xa0, 0x8f, /* 0xa0-0xa7 */ 0xa4, 0x00, 0x00, 0xae, 0xaa, 0x00, 0x00, 0xa7, /* 0xa8-0xaf */ 0xf8, 0xf1, 0xfd, 0xa6, 0xa1, 0xe6, 0x86, 0xfa, /* 0xb0-0xb7 */ 0xa5, 0x00, 0x00, 0xaf, 0xac, 0xab, 0xad, 0x00, /* 0xb8-0xbf */ 0x8e, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0xc0-0xc7 */ 0x91, 0x90, 0x92, 0x94, 0x00, 0x00, 0xa8, 0x95, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x9d, 0x00, 0x9e, 0x9a, 0x00, 0x00, 0xe1, /* 0xd8-0xdf */ 0x85, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x87, /* 0xe0-0xe7 */ 0x8a, 0x82, 0x88, 0x89, 0x00, 0x00, 0x8c, 0x8b, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0xa2, 0x93, 0x00, 0x00, 0xf6, /* 0xf0-0xf7 */ 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0xe2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0xe9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0xe8, 0x00, /* 0xa0-0xa7 */ 0x00, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0xe0, 0x00, 0x00, 0xeb, 0xee, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0xe3, 0x00, 0x00, 0xe5, 0xe7, 0x00, 0xed, 0x00, /* 0xc0-0xc7 */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8d, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, /* 0x78-0x7f */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0xf9, 0xfb, 0x00, 0x00, 0x00, 0xec, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0xf0, 0x00, 0x00, 0xf3, 0xf2, 0x00, 0x00, /* 0x60-0x67 */ }; static const unsigned char page23[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0xf4, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0xd5, 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, /* 0x50-0x57 */ 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, /* 0x58-0x5f */ 0xcc, 0xb5, 0xb6, 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, /* 0x60-0x67 */ 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xde, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, page22, page23, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x87, 0x81, 0x82, 0x83, 0x83, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x85, 0x8f, /* 0x88-0x8f */ 0x82, 0x8a, 0x88, 0x93, 0x89, 0x8b, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x93, 0x81, 0x9b, 0x9c, 0x97, 0x96, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0x8c, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0x00, 0xe3, 0xe5, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xed, 0x00, 0x00, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x9a, 0x90, 0x84, 0x84, 0x8e, 0x86, 0x80, /* 0x80-0x87 */ 0x92, 0x94, 0x91, 0x95, 0xa8, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x99, 0x94, 0x95, 0x9e, 0x9d, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0x00, 0x00, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0x00, 0xe1, 0xe2, 0x00, 0xe4, 0xe4, 0x00, 0x00, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0x00, 0xec, 0xe8, 0x00, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp863", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp863(void) { return register_nls(&table); } static void __exit exit_nls_cp863(void) { unregister_nls(&table); } module_init(init_nls_cp863) module_exit(exit_nls_cp863) MODULE_LICENSE("Dual BSD/GPL"); |
46 46 46 46 16 16 15 4 3 6 6 6 4 4 1 1 2 2 16 15 3 46 1 1 46 5 3 2 4 4 4 4 4 4 4 7 5 4 4 46 10 10 46 46 14 14 39 39 17 15 2 46 46 46 46 39 46 6 6 10 10 6 6 46 46 6 6 5 5 5 5 6 6 6 46 46 46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 | /* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ /* * Now we have all buffers that must be used in balancing of the tree * Further calculations can not cause schedule(), and thus the buffer * tree will be stable until the balancing will be finished * balance the tree according to the analysis made before, * and using buffers obtained after all above. */ #include <linux/uaccess.h> #include <linux/time.h> #include "reiserfs.h" #include <linux/buffer_head.h> #include <linux/kernel.h> static inline void buffer_info_init_left(struct tree_balance *tb, struct buffer_info *bi) { bi->tb = tb; bi->bi_bh = tb->L[0]; bi->bi_parent = tb->FL[0]; bi->bi_position = get_left_neighbor_position(tb, 0); } static inline void buffer_info_init_right(struct tree_balance *tb, struct buffer_info *bi) { bi->tb = tb; bi->bi_bh = tb->R[0]; bi->bi_parent = tb->FR[0]; bi->bi_position = get_right_neighbor_position(tb, 0); } static inline void buffer_info_init_tbS0(struct tree_balance *tb, struct buffer_info *bi) { bi->tb = tb; bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); } static inline void buffer_info_init_bh(struct tree_balance *tb, struct buffer_info *bi, struct buffer_head *bh) { bi->tb = tb; bi->bi_bh = bh; bi->bi_parent = NULL; bi->bi_position = 0; } inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, struct buffer_head *bh, int flag) { journal_mark_dirty(tb->transaction_handle, bh); } #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty /* * summary: * if deleting something ( tb->insert_size[0] < 0 ) * return(balance_leaf_when_delete()); (flag d handled here) * else * if lnum is larger than 0 we put items into the left node * if rnum is larger than 0 we put items into the right node * if snum1 is larger than 0 we put items into the new node s1 * if snum2 is larger than 0 we put items into the new node s2 * Note that all *num* count new items being created. */ static void balance_leaf_when_delete_del(struct tree_balance *tb) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int item_pos = PATH_LAST_POSITION(tb->tb_path); struct buffer_info bi; #ifdef CONFIG_REISERFS_CHECK struct item_head *ih = item_head(tbS0, item_pos); #endif RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], "vs-12013: mode Delete, insert size %d, ih to be deleted %h", -tb->insert_size[0], ih); buffer_info_init_tbS0(tb, &bi); leaf_delete_items(&bi, 0, item_pos, 1, -1); if (!item_pos && tb->CFL[0]) { if (B_NR_ITEMS(tbS0)) { replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); } else { if (!PATH_H_POSITION(tb->tb_path, 1)) replace_key(tb, tb->CFL[0], tb->lkey[0], PATH_H_PPARENT(tb->tb_path, 0), 0); } } RFALSE(!item_pos && !tb->CFL[0], "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]); } /* cut item in S[0] */ static void balance_leaf_when_delete_cut(struct tree_balance *tb) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int item_pos = PATH_LAST_POSITION(tb->tb_path); struct item_head *ih = item_head(tbS0, item_pos); int pos_in_item = tb->tb_path->pos_in_item; struct buffer_info bi; buffer_info_init_tbS0(tb, &bi); if (is_direntry_le_ih(ih)) { /* * UFS unlink semantics are such that you can only * delete one directory entry at a time. * * when we cut a directory tb->insert_size[0] means * number of entries to be cut (always 1) */ tb->insert_size[0] = -1; leaf_cut_from_buffer(&bi, item_pos, pos_in_item, -tb->insert_size[0]); RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], "PAP-12030: can not change delimiting key. CFL[0]=%p", tb->CFL[0]); if (!item_pos && !pos_in_item && tb->CFL[0]) replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); } else { leaf_cut_from_buffer(&bi, item_pos, pos_in_item, -tb->insert_size[0]); RFALSE(!ih_item_len(ih), "PAP-12035: cut must leave non-zero dynamic " "length of item"); } } static int balance_leaf_when_delete_left(struct tree_balance *tb) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); /* L[0] must be joined with S[0] */ if (tb->lnum[0] == -1) { /* R[0] must be also joined with S[0] */ if (tb->rnum[0] == -1) { if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { /* * all contents of all the * 3 buffers will be in L[0] */ if (PATH_H_POSITION(tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0])) replace_key(tb, tb->CFL[0], tb->lkey[0], tb->FR[0], 1); leaf_move_items(LEAF_FROM_S_TO_L, tb, n, -1, NULL); leaf_move_items(LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL); reiserfs_invalidate_buffer(tb, tbS0); reiserfs_invalidate_buffer(tb, tb->R[0]); return 0; } /* all contents of all the 3 buffers will be in R[0] */ leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, NULL); leaf_move_items(LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL); /* right_delimiting_key is correct in R[0] */ replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); reiserfs_invalidate_buffer(tb, tbS0); reiserfs_invalidate_buffer(tb, tb->L[0]); return -1; } RFALSE(tb->rnum[0] != 0, "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); /* all contents of L[0] and S[0] will be in L[0] */ leaf_shift_left(tb, n, -1); reiserfs_invalidate_buffer(tb, tbS0); return 0; } /* * a part of contents of S[0] will be in L[0] and * the rest part of S[0] will be in R[0] */ RFALSE((tb->lnum[0] + tb->rnum[0] < n) || (tb->lnum[0] + tb->rnum[0] > n + 1), "PAP-12050: rnum(%d) and lnum(%d) and item " "number(%d) in S[0] are not consistent", tb->rnum[0], tb->lnum[0], n); RFALSE((tb->lnum[0] + tb->rnum[0] == n) && (tb->lbytes != -1 || tb->rbytes != -1), "PAP-12055: bad rbytes (%d)/lbytes (%d) " "parameters when items are not split", tb->rbytes, tb->lbytes); RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && (tb->lbytes < 1 || tb->rbytes != -1), "PAP-12060: bad rbytes (%d)/lbytes (%d) " "parameters when items are split", tb->rbytes, tb->lbytes); leaf_shift_left(tb, tb->lnum[0], tb->lbytes); leaf_shift_right(tb, tb->rnum[0], tb->rbytes); reiserfs_invalidate_buffer(tb, tbS0); return 0; } /* * Balance leaf node in case of delete or cut: insert_size[0] < 0 * * lnum, rnum can have values >= -1 * -1 means that the neighbor must be joined with S * 0 means that nothing should be done with the neighbor * >0 means to shift entirely or partly the specified number of items * to the neighbor */ static int balance_leaf_when_delete(struct tree_balance *tb, int flag) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct buffer_info bi; int n; RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, "vs- 12000: level: wrong FR %z", tb->FR[0]); RFALSE(tb->blknum[0] > 1, "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), "PAP-12010: tree can not be empty"); buffer_info_init_tbS0(tb, &bi); /* Delete or truncate the item */ BUG_ON(flag != M_DELETE && flag != M_CUT); if (flag == M_DELETE) balance_leaf_when_delete_del(tb); else /* M_CUT */ balance_leaf_when_delete_cut(tb); /* * the rule is that no shifting occurs unless by shifting * a node can be freed */ n = B_NR_ITEMS(tbS0); /* L[0] takes part in balancing */ if (tb->lnum[0]) return balance_leaf_when_delete_left(tb); if (tb->rnum[0] == -1) { /* all contents of R[0] and S[0] will be in R[0] */ leaf_shift_right(tb, n, -1); reiserfs_invalidate_buffer(tb, tbS0); return 0; } RFALSE(tb->rnum[0], "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); return 0; } static unsigned int balance_leaf_insert_left(struct tree_balance *tb, struct item_head *const ih, const char * const body) { int ret; struct buffer_info bi; int n = B_NR_ITEMS(tb->L[0]); unsigned body_shift_bytes = 0; if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { /* part of new item falls into L[0] */ int new_item_len, shift; ret = leaf_shift_left(tb, tb->lnum[0] - 1, -1); /* Calculate item length to insert to S[0] */ new_item_len = ih_item_len(ih) - tb->lbytes; /* Calculate and check item length to insert to L[0] */ put_ih_item_len(ih, ih_item_len(ih) - new_item_len); RFALSE(ih_item_len(ih) <= 0, "PAP-12080: there is nothing to insert into L[0]: " "ih_item_len=%d", ih_item_len(ih)); /* Insert new item into L[0] */ buffer_info_init_left(tb, &bi); leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body, min_t(int, tb->zeroes_num, ih_item_len(ih))); /* * Calculate key component, item length and body to * insert into S[0] */ shift = 0; if (is_indirect_le_ih(ih)) shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; add_le_ih_k_offset(ih, tb->lbytes << shift); put_ih_item_len(ih, new_item_len); if (tb->lbytes > tb->zeroes_num) { body_shift_bytes = tb->lbytes - tb->zeroes_num; tb->zeroes_num = 0; } else tb->zeroes_num -= tb->lbytes; RFALSE(ih_item_len(ih) <= 0, "PAP-12085: there is nothing to insert into S[0]: " "ih_item_len=%d", ih_item_len(ih)); } else { /* new item in whole falls into L[0] */ /* Shift lnum[0]-1 items to L[0] */ ret = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes); /* Insert new item into L[0] */ buffer_info_init_left(tb, &bi); leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body, tb->zeroes_num); tb->insert_size[0] = 0; tb->zeroes_num = 0; } return body_shift_bytes; } static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb, struct item_head * const ih, const char * const body) { int n = B_NR_ITEMS(tb->L[0]); struct buffer_info bi; RFALSE(tb->zeroes_num, "PAP-12090: invalid parameter in case of a directory"); /* directory item */ if (tb->lbytes > tb->pos_in_item) { /* new directory entry falls into L[0] */ struct item_head *pasted; int ret, l_pos_in_item = tb->pos_in_item; /* * Shift lnum[0] - 1 items in whole. * Shift lbytes - 1 entries from given directory item */ ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); if (ret && !tb->item_pos) { pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1); l_pos_in_item += ih_entry_count(pasted) - (tb->lbytes - 1); } /* Append given directory entry to directory item */ buffer_info_init_left(tb, &bi); leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, l_pos_in_item, tb->insert_size[0], body, tb->zeroes_num); /* * previous string prepared space for pasting new entry, * following string pastes this entry */ /* * when we have merge directory item, pos_in_item * has been changed too */ /* paste new directory entry. 1 is entry number */ leaf_paste_entries(&bi, n + tb->item_pos - ret, l_pos_in_item, 1, (struct reiserfs_de_head *) body, body + DEH_SIZE, tb->insert_size[0]); tb->insert_size[0] = 0; } else { /* new directory item doesn't fall into L[0] */ /* * Shift lnum[0]-1 items in whole. Shift lbytes * directory entries from directory item number lnum[0] */ leaf_shift_left(tb, tb->lnum[0], tb->lbytes); } /* Calculate new position to append in item body */ tb->pos_in_item -= tb->lbytes; } static unsigned int balance_leaf_paste_left_shift(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tb->L[0]); struct buffer_info bi; int body_shift_bytes = 0; if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { balance_leaf_paste_left_shift_dirent(tb, ih, body); return 0; } RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. " "lbytes=%d", tb->lbytes); RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)), "PAP-12100: incorrect position to paste: " "item_len=%d, pos_in_item=%d", ih_item_len(item_head(tbS0, tb->item_pos)), tb->pos_in_item); /* appended item will be in L[0] in whole */ if (tb->lbytes >= tb->pos_in_item) { struct item_head *tbS0_pos_ih, *tbL0_ih; struct item_head *tbS0_0_ih; struct reiserfs_key *left_delim_key; int ret, l_n, version, temp_l; tbS0_pos_ih = item_head(tbS0, tb->item_pos); tbS0_0_ih = item_head(tbS0, 0); /* * this bytes number must be appended * to the last item of L[h] */ l_n = tb->lbytes - tb->pos_in_item; /* Calculate new insert_size[0] */ tb->insert_size[0] -= l_n; RFALSE(tb->insert_size[0] <= 0, "PAP-12105: there is nothing to paste into " "L[0]. insert_size=%d", tb->insert_size[0]); ret = leaf_shift_left(tb, tb->lnum[0], ih_item_len(tbS0_pos_ih)); tbL0_ih = item_head(tb->L[0], n + tb->item_pos - ret); /* Append to body of item in L[0] */ buffer_info_init_left(tb, &bi); leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, ih_item_len(tbL0_ih), l_n, body, min_t(int, l_n, tb->zeroes_num)); /* * 0-th item in S0 can be only of DIRECT type * when l_n != 0 */ temp_l = l_n; RFALSE(ih_item_len(tbS0_0_ih), "PAP-12106: item length must be 0"); RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key, leaf_key(tb->L[0], n + tb->item_pos - ret)), "PAP-12107: items must be of the same file"); if (is_indirect_le_ih(tbL0_ih)) { int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; temp_l = l_n << shift; } /* update key of first item in S0 */ version = ih_version(tbS0_0_ih); add_le_key_k_offset(version, &tbS0_0_ih->ih_key, temp_l); /* update left delimiting key */ left_delim_key = internal_key(tb->CFL[0], tb->lkey[0]); add_le_key_k_offset(version, left_delim_key, temp_l); /* * Calculate new body, position in item and * insert_size[0] */ if (l_n > tb->zeroes_num) { body_shift_bytes = l_n - tb->zeroes_num; tb->zeroes_num = 0; } else tb->zeroes_num -= l_n; tb->pos_in_item = 0; RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key, leaf_key(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1)) || !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size) || !op_is_left_mergeable(left_delim_key, tbS0->b_size), "PAP-12120: item must be merge-able with left " "neighboring item"); } else { /* only part of the appended item will be in L[0] */ /* Calculate position in item for append in S[0] */ tb->pos_in_item -= tb->lbytes; RFALSE(tb->pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", tb->pos_in_item); /* * Shift lnum[0] - 1 items in whole. * Shift lbytes - 1 byte from item number lnum[0] */ leaf_shift_left(tb, tb->lnum[0], tb->lbytes); } return body_shift_bytes; } /* appended item will be in L[0] in whole */ static void balance_leaf_paste_left_whole(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tb->L[0]); struct buffer_info bi; struct item_head *pasted; int ret; /* if we paste into first item of S[0] and it is left mergable */ if (!tb->item_pos && op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) { /* * then increment pos_in_item by the size of the * last item in L[0] */ pasted = item_head(tb->L[0], n - 1); if (is_direntry_le_ih(pasted)) tb->pos_in_item += ih_entry_count(pasted); else tb->pos_in_item += ih_item_len(pasted); } /* * Shift lnum[0] - 1 items in whole. * Shift lbytes - 1 byte from item number lnum[0] */ ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes); /* Append to body of item in L[0] */ buffer_info_init_left(tb, &bi); leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, tb->pos_in_item, tb->insert_size[0], body, tb->zeroes_num); /* if appended item is directory, paste entry */ pasted = item_head(tb->L[0], n + tb->item_pos - ret); if (is_direntry_le_ih(pasted)) leaf_paste_entries(&bi, n + tb->item_pos - ret, tb->pos_in_item, 1, (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]); /* * if appended item is indirect item, put unformatted node * into un list */ if (is_indirect_le_ih(pasted)) set_ih_free_space(pasted, 0); tb->insert_size[0] = 0; tb->zeroes_num = 0; } static unsigned int balance_leaf_paste_left(struct tree_balance *tb, struct item_head * const ih, const char * const body) { /* we must shift the part of the appended item */ if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) return balance_leaf_paste_left_shift(tb, ih, body); else balance_leaf_paste_left_whole(tb, ih, body); return 0; } /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ static unsigned int balance_leaf_left(struct tree_balance *tb, struct item_head * const ih, const char * const body, int flag) { if (tb->lnum[0] <= 0) return 0; /* new item or it part falls to L[0], shift it too */ if (tb->item_pos < tb->lnum[0]) { BUG_ON(flag != M_INSERT && flag != M_PASTE); if (flag == M_INSERT) return balance_leaf_insert_left(tb, ih, body); else /* M_PASTE */ return balance_leaf_paste_left(tb, ih, body); } else /* new item doesn't fall into L[0] */ leaf_shift_left(tb, tb->lnum[0], tb->lbytes); return 0; } static void balance_leaf_insert_right(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); struct buffer_info bi; /* new item or part of it doesn't fall into R[0] */ if (n - tb->rnum[0] >= tb->item_pos) { leaf_shift_right(tb, tb->rnum[0], tb->rbytes); return; } /* new item or its part falls to R[0] */ /* part of new item falls into R[0] */ if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { loff_t old_key_comp, old_len, r_zeroes_number; const char *r_body; int shift; loff_t offset; leaf_shift_right(tb, tb->rnum[0] - 1, -1); /* Remember key component and item length */ old_key_comp = le_ih_k_offset(ih); old_len = ih_item_len(ih); /* * Calculate key component and item length to insert * into R[0] */ shift = 0; if (is_indirect_le_ih(ih)) shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << shift); set_le_ih_k_offset(ih, offset); put_ih_item_len(ih, tb->rbytes); /* Insert part of the item into R[0] */ buffer_info_init_right(tb, &bi); if ((old_len - tb->rbytes) > tb->zeroes_num) { r_zeroes_number = 0; r_body = body + (old_len - tb->rbytes) - tb->zeroes_num; } else { r_body = body; r_zeroes_number = tb->zeroes_num - (old_len - tb->rbytes); tb->zeroes_num -= r_zeroes_number; } leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number); /* Replace right delimiting key by first key in R[0] */ replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); /* * Calculate key component and item length to * insert into S[0] */ set_le_ih_k_offset(ih, old_key_comp); put_ih_item_len(ih, old_len - tb->rbytes); tb->insert_size[0] -= tb->rbytes; } else { /* whole new item falls into R[0] */ /* Shift rnum[0]-1 items to R[0] */ leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes); /* Insert new item into R[0] */ buffer_info_init_right(tb, &bi); leaf_insert_into_buf(&bi, tb->item_pos - n + tb->rnum[0] - 1, ih, body, tb->zeroes_num); if (tb->item_pos - n + tb->rnum[0] - 1 == 0) replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); tb->zeroes_num = tb->insert_size[0] = 0; } } static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct buffer_info bi; int entry_count; RFALSE(tb->zeroes_num, "PAP-12145: invalid parameter in case of a directory"); entry_count = ih_entry_count(item_head(tbS0, tb->item_pos)); /* new directory entry falls into R[0] */ if (entry_count - tb->rbytes < tb->pos_in_item) { int paste_entry_position; RFALSE(tb->rbytes - 1 >= entry_count || !tb->insert_size[0], "PAP-12150: no enough of entries to shift to R[0]: " "rbytes=%d, entry_count=%d", tb->rbytes, entry_count); /* * Shift rnum[0]-1 items in whole. * Shift rbytes-1 directory entries from directory * item number rnum[0] */ leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1); /* Paste given directory entry to directory item */ paste_entry_position = tb->pos_in_item - entry_count + tb->rbytes - 1; buffer_info_init_right(tb, &bi); leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, tb->zeroes_num); /* paste entry */ leaf_paste_entries(&bi, 0, paste_entry_position, 1, (struct reiserfs_de_head *) body, body + DEH_SIZE, tb->insert_size[0]); /* change delimiting keys */ if (paste_entry_position == 0) replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); tb->insert_size[0] = 0; tb->pos_in_item++; } else { /* new directory entry doesn't fall into R[0] */ leaf_shift_right(tb, tb->rnum[0], tb->rbytes); } } static void balance_leaf_paste_right_shift(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n_shift, n_rem, r_zeroes_number, version; unsigned long temp_rem; const char *r_body; struct buffer_info bi; /* we append to directory item */ if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { balance_leaf_paste_right_shift_dirent(tb, ih, body); return; } /* regular object */ /* * Calculate number of bytes which must be shifted * from appended item */ n_shift = tb->rbytes - tb->insert_size[0]; if (n_shift < 0) n_shift = 0; RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)), "PAP-12155: invalid position to paste. ih_item_len=%d, " "pos_in_item=%d", tb->pos_in_item, ih_item_len(item_head(tbS0, tb->item_pos))); leaf_shift_right(tb, tb->rnum[0], n_shift); /* * Calculate number of bytes which must remain in body * after appending to R[0] */ n_rem = tb->insert_size[0] - tb->rbytes; if (n_rem < 0) n_rem = 0; temp_rem = n_rem; version = ih_version(item_head(tb->R[0], 0)); if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) { int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; temp_rem = n_rem << shift; } add_le_key_k_offset(version, leaf_key(tb->R[0], 0), temp_rem); add_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]), temp_rem); do_balance_mark_internal_dirty(tb, tb->CFR[0], 0); /* Append part of body into R[0] */ buffer_info_init_right(tb, &bi); if (n_rem > tb->zeroes_num) { r_zeroes_number = 0; r_body = body + n_rem - tb->zeroes_num; } else { r_body = body; r_zeroes_number = tb->zeroes_num - n_rem; tb->zeroes_num -= r_zeroes_number; } leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeroes_number); if (is_indirect_le_ih(item_head(tb->R[0], 0))) set_ih_free_space(item_head(tb->R[0], 0), 0); tb->insert_size[0] = n_rem; if (!n_rem) tb->pos_in_item++; } static void balance_leaf_paste_right_whole(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); struct item_head *pasted; struct buffer_info bi; buffer_info_init_right(tb, &bi); leaf_shift_right(tb, tb->rnum[0], tb->rbytes); /* append item in R[0] */ if (tb->pos_in_item >= 0) { buffer_info_init_right(tb, &bi); leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->rnum[0], tb->pos_in_item, tb->insert_size[0], body, tb->zeroes_num); } /* paste new entry, if item is directory item */ pasted = item_head(tb->R[0], tb->item_pos - n + tb->rnum[0]); if (is_direntry_le_ih(pasted) && tb->pos_in_item >= 0) { leaf_paste_entries(&bi, tb->item_pos - n + tb->rnum[0], tb->pos_in_item, 1, (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]); if (!tb->pos_in_item) { RFALSE(tb->item_pos - n + tb->rnum[0], "PAP-12165: directory item must be first " "item of node when pasting is in 0th position"); /* update delimiting keys */ replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); } } if (is_indirect_le_ih(pasted)) set_ih_free_space(pasted, 0); tb->zeroes_num = tb->insert_size[0] = 0; } static void balance_leaf_paste_right(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); /* new item doesn't fall into R[0] */ if (n - tb->rnum[0] > tb->item_pos) { leaf_shift_right(tb, tb->rnum[0], tb->rbytes); return; } /* pasted item or part of it falls to R[0] */ if (tb->item_pos == n - tb->rnum[0] && tb->rbytes != -1) /* we must shift the part of the appended item */ balance_leaf_paste_right_shift(tb, ih, body); else /* pasted item in whole falls into R[0] */ balance_leaf_paste_right_whole(tb, ih, body); } /* shift rnum[0] items from S[0] to the right neighbor R[0] */ static void balance_leaf_right(struct tree_balance *tb, struct item_head * const ih, const char * const body, int flag) { if (tb->rnum[0] <= 0) return; BUG_ON(flag != M_INSERT && flag != M_PASTE); if (flag == M_INSERT) balance_leaf_insert_right(tb, ih, body); else /* M_PASTE */ balance_leaf_paste_right(tb, ih, body); } static void balance_leaf_new_nodes_insert(struct tree_balance *tb, struct item_head * const ih, const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); struct buffer_info bi; int shift; /* new item or it part don't falls into S_new[i] */ if (n - tb->snum[i] >= tb->item_pos) { leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], tb->sbytes[i], tb->S_new[i]); return; } /* new item or it's part falls to first new node S_new[i] */ /* part of new item falls into S_new[i] */ if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) { int old_key_comp, old_len, r_zeroes_number; const char *r_body; /* Move snum[i]-1 items from S[0] to S_new[i] */ leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, -1, tb->S_new[i]); /* Remember key component and item length */ old_key_comp = le_ih_k_offset(ih); old_len = ih_item_len(ih); /* * Calculate key component and item length to insert * into S_new[i] */ shift = 0; if (is_indirect_le_ih(ih)) shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; set_le_ih_k_offset(ih, le_ih_k_offset(ih) + ((old_len - tb->sbytes[i]) << shift)); put_ih_item_len(ih, tb->sbytes[i]); /* Insert part of the item into S_new[i] before 0-th item */ buffer_info_init_bh(tb, &bi, tb->S_new[i]); if ((old_len - tb->sbytes[i]) > tb->zeroes_num) { r_zeroes_number = 0; r_body = body + (old_len - tb->sbytes[i]) - tb->zeroes_num; } else { r_body = body; r_zeroes_number = tb->zeroes_num - (old_len - tb->sbytes[i]); tb->zeroes_num -= r_zeroes_number; } leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number); /* * Calculate key component and item length to * insert into S[i] */ set_le_ih_k_offset(ih, old_key_comp); put_ih_item_len(ih, old_len - tb->sbytes[i]); tb->insert_size[0] -= tb->sbytes[i]; } else { /* whole new item falls into S_new[i] */ /* * Shift snum[0] - 1 items to S_new[i] * (sbytes[i] of split item) */ leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, tb->sbytes[i], tb->S_new[i]); /* Insert new item into S_new[i] */ buffer_info_init_bh(tb, &bi, tb->S_new[i]); leaf_insert_into_buf(&bi, tb->item_pos - n + tb->snum[i] - 1, ih, body, tb->zeroes_num); tb->zeroes_num = tb->insert_size[0] = 0; } } /* we append to directory item */ static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb, struct item_head * const ih, const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct item_head *aux_ih = item_head(tbS0, tb->item_pos); int entry_count = ih_entry_count(aux_ih); struct buffer_info bi; if (entry_count - tb->sbytes[i] < tb->pos_in_item && tb->pos_in_item <= entry_count) { /* new directory entry falls into S_new[i] */ RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0"); RFALSE(tb->sbytes[i] - 1 >= entry_count, "PAP-12220: there are no so much entries (%d), only %d", tb->sbytes[i] - 1, entry_count); /* * Shift snum[i]-1 items in whole. * Shift sbytes[i] directory entries * from directory item number snum[i] */ leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], tb->sbytes[i] - 1, tb->S_new[i]); /* * Paste given directory entry to * directory item */ buffer_info_init_bh(tb, &bi, tb->S_new[i]); leaf_paste_in_buffer(&bi, 0, tb->pos_in_item - entry_count + tb->sbytes[i] - 1, tb->insert_size[0], body, tb->zeroes_num); /* paste new directory entry */ leaf_paste_entries(&bi, 0, tb->pos_in_item - entry_count + tb->sbytes[i] - 1, 1, (struct reiserfs_de_head *) body, body + DEH_SIZE, tb->insert_size[0]); tb->insert_size[0] = 0; tb->pos_in_item++; } else { /* new directory entry doesn't fall into S_new[i] */ leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], tb->sbytes[i], tb->S_new[i]); } } static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb, struct item_head * const ih, const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct item_head *aux_ih = item_head(tbS0, tb->item_pos); int n_shift, n_rem, r_zeroes_number, shift; const char *r_body; struct item_head *tmp; struct buffer_info bi; RFALSE(ih, "PAP-12210: ih must be 0"); if (is_direntry_le_ih(aux_ih)) { balance_leaf_new_nodes_paste_dirent(tb, ih, body, insert_key, insert_ptr, i); return; } /* regular object */ RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)) || tb->insert_size[0] <= 0, "PAP-12225: item too short or insert_size <= 0"); /* * Calculate number of bytes which must be shifted from appended item */ n_shift = tb->sbytes[i] - tb->insert_size[0]; if (n_shift < 0) n_shift = 0; leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], n_shift, tb->S_new[i]); /* * Calculate number of bytes which must remain in body after * append to S_new[i] */ n_rem = tb->insert_size[0] - tb->sbytes[i]; if (n_rem < 0) n_rem = 0; /* Append part of body into S_new[0] */ buffer_info_init_bh(tb, &bi, tb->S_new[i]); if (n_rem > tb->zeroes_num) { r_zeroes_number = 0; r_body = body + n_rem - tb->zeroes_num; } else { r_body = body; r_zeroes_number = tb->zeroes_num - n_rem; tb->zeroes_num -= r_zeroes_number; } leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeroes_number); tmp = item_head(tb->S_new[i], 0); shift = 0; if (is_indirect_le_ih(tmp)) { set_ih_free_space(tmp, 0); shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; } add_le_ih_k_offset(tmp, n_rem << shift); tb->insert_size[0] = n_rem; if (!n_rem) tb->pos_in_item++; } static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb, struct item_head * const ih, const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); int leaf_mi; struct item_head *pasted; struct buffer_info bi; #ifdef CONFIG_REISERFS_CHECK struct item_head *ih_check = item_head(tbS0, tb->item_pos); if (!is_direntry_le_ih(ih_check) && (tb->pos_in_item != ih_item_len(ih_check) || tb->insert_size[0] <= 0)) reiserfs_panic(tb->tb_sb, "PAP-12235", "pos_in_item must be equal to ih_item_len"); #endif leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], tb->sbytes[i], tb->S_new[i]); RFALSE(leaf_mi, "PAP-12240: unexpected value returned by leaf_move_items (%d)", leaf_mi); /* paste into item */ buffer_info_init_bh(tb, &bi, tb->S_new[i]); leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->snum[i], tb->pos_in_item, tb->insert_size[0], body, tb->zeroes_num); pasted = item_head(tb->S_new[i], tb->item_pos - n + tb->snum[i]); if (is_direntry_le_ih(pasted)) leaf_paste_entries(&bi, tb->item_pos - n + tb->snum[i], tb->pos_in_item, 1, (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]); /* if we paste to indirect item update ih_free_space */ if (is_indirect_le_ih(pasted)) set_ih_free_space(pasted, 0); tb->zeroes_num = tb->insert_size[0] = 0; } static void balance_leaf_new_nodes_paste(struct tree_balance *tb, struct item_head * const ih, const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); /* pasted item doesn't fall into S_new[i] */ if (n - tb->snum[i] > tb->item_pos) { leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], tb->sbytes[i], tb->S_new[i]); return; } /* pasted item or part if it falls to S_new[i] */ if (tb->item_pos == n - tb->snum[i] && tb->sbytes[i] != -1) /* we must shift part of the appended item */ balance_leaf_new_nodes_paste_shift(tb, ih, body, insert_key, insert_ptr, i); else /* item falls wholly into S_new[i] */ balance_leaf_new_nodes_paste_whole(tb, ih, body, insert_key, insert_ptr, i); } /* Fill new nodes that appear in place of S[0] */ static void balance_leaf_new_nodes(struct tree_balance *tb, struct item_head * const ih, const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int flag) { int i; for (i = tb->blknum[0] - 2; i >= 0; i--) { BUG_ON(flag != M_INSERT && flag != M_PASTE); RFALSE(!tb->snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, tb->snum[i]); /* here we shift from S to S_new nodes */ tb->S_new[i] = get_FEB(tb); /* initialized block type and tree level */ set_blkh_level(B_BLK_HEAD(tb->S_new[i]), DISK_LEAF_NODE_LEVEL); if (flag == M_INSERT) balance_leaf_new_nodes_insert(tb, ih, body, insert_key, insert_ptr, i); else /* M_PASTE */ balance_leaf_new_nodes_paste(tb, ih, body, insert_key, insert_ptr, i); memcpy(insert_key + i, leaf_key(tb->S_new[i], 0), KEY_SIZE); insert_ptr[i] = tb->S_new[i]; RFALSE(!buffer_journaled(tb->S_new[i]) || buffer_journal_dirty(tb->S_new[i]) || buffer_dirty(tb->S_new[i]), "PAP-12247: S_new[%d] : (%b)", i, tb->S_new[i]); } } static void balance_leaf_finish_node_insert(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct buffer_info bi; buffer_info_init_tbS0(tb, &bi); leaf_insert_into_buf(&bi, tb->item_pos, ih, body, tb->zeroes_num); /* If we insert the first key change the delimiting key */ if (tb->item_pos == 0) { if (tb->CFL[0]) /* can be 0 in reiserfsck */ replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); } } static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct item_head *pasted = item_head(tbS0, tb->item_pos); struct buffer_info bi; if (tb->pos_in_item >= 0 && tb->pos_in_item <= ih_entry_count(pasted)) { RFALSE(!tb->insert_size[0], "PAP-12260: insert_size is 0 already"); /* prepare space */ buffer_info_init_tbS0(tb, &bi); leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item, tb->insert_size[0], body, tb->zeroes_num); /* paste entry */ leaf_paste_entries(&bi, tb->item_pos, tb->pos_in_item, 1, (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]); if (!tb->item_pos && !tb->pos_in_item) { RFALSE(!tb->CFL[0] || !tb->L[0], "PAP-12270: CFL[0]/L[0] must be specified"); if (tb->CFL[0]) replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); } tb->insert_size[0] = 0; } } static void balance_leaf_finish_node_paste(struct tree_balance *tb, struct item_head * const ih, const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct buffer_info bi; struct item_head *pasted = item_head(tbS0, tb->item_pos); /* when directory, may be new entry already pasted */ if (is_direntry_le_ih(pasted)) { balance_leaf_finish_node_paste_dirent(tb, ih, body); return; } /* regular object */ if (tb->pos_in_item == ih_item_len(pasted)) { RFALSE(tb->insert_size[0] <= 0, "PAP-12275: insert size must not be %d", tb->insert_size[0]); buffer_info_init_tbS0(tb, &bi); leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item, tb->insert_size[0], body, tb->zeroes_num); if (is_indirect_le_ih(pasted)) set_ih_free_space(pasted, 0); tb->insert_size[0] = 0; } #ifdef CONFIG_REISERFS_CHECK else if (tb->insert_size[0]) { print_cur_tb("12285"); reiserfs_panic(tb->tb_sb, "PAP-12285", "insert_size must be 0 (%d)", tb->insert_size[0]); } #endif } /* * if the affected item was not wholly shifted then we * perform all necessary operations on that part or whole * of the affected item which remains in S */ static void balance_leaf_finish_node(struct tree_balance *tb, struct item_head * const ih, const char * const body, int flag) { /* if we must insert or append into buffer S[0] */ if (0 <= tb->item_pos && tb->item_pos < tb->s0num) { if (flag == M_INSERT) balance_leaf_finish_node_insert(tb, ih, body); else /* M_PASTE */ balance_leaf_finish_node_paste(tb, ih, body); } } /** * balance_leaf - reiserfs tree balancing algorithm * @tb: tree balance state * @ih: item header of inserted item (little endian) * @body: body of inserted item or bytes to paste * @flag: i - insert, d - delete, c - cut, p - paste (see do_balance) * passed back: * @insert_key: key to insert new nodes * @insert_ptr: array of nodes to insert at the next level * * In our processing of one level we sometimes determine what must be * inserted into the next higher level. This insertion consists of a * key or two keys and their corresponding pointers. */ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, const char *body, int flag, struct item_head *insert_key, struct buffer_head **insert_ptr) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); PROC_INFO_INC(tb->tb_sb, balance_at[0]); /* Make balance in case insert_size[0] < 0 */ if (tb->insert_size[0] < 0) return balance_leaf_when_delete(tb, flag); tb->item_pos = PATH_LAST_POSITION(tb->tb_path), tb->pos_in_item = tb->tb_path->pos_in_item, tb->zeroes_num = 0; if (flag == M_INSERT && !body) tb->zeroes_num = ih_item_len(ih); /* * for indirect item pos_in_item is measured in unformatted node * pointers. Recalculate to bytes */ if (flag != M_INSERT && is_indirect_le_ih(item_head(tbS0, tb->item_pos))) tb->pos_in_item *= UNFM_P_SIZE; body += balance_leaf_left(tb, ih, body, flag); /* tb->lnum[0] > 0 */ /* Calculate new item position */ tb->item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); balance_leaf_right(tb, ih, body, flag); /* tb->rnum[0] > 0 */ RFALSE(tb->blknum[0] > 3, "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); RFALSE(tb->blknum[0] < 0, "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); /* * if while adding to a node we discover that it is possible to split * it in two, and merge the left part into the left neighbor and the * right part into the right neighbor, eliminating the node */ if (tb->blknum[0] == 0) { /* node S[0] is empty now */ RFALSE(!tb->lnum[0] || !tb->rnum[0], "PAP-12190: lnum and rnum must not be zero"); /* * if insertion was done before 0-th position in R[0], right * delimiting key of the tb->L[0]'s and left delimiting key are * not set correctly */ if (tb->CFL[0]) { if (!tb->CFR[0]) reiserfs_panic(tb->tb_sb, "vs-12195", "CFR not initialized"); copy_key(internal_key(tb->CFL[0], tb->lkey[0]), internal_key(tb->CFR[0], tb->rkey[0])); do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); } reiserfs_invalidate_buffer(tb, tbS0); return 0; } balance_leaf_new_nodes(tb, ih, body, insert_key, insert_ptr, flag); balance_leaf_finish_node(tb, ih, body, flag); #ifdef CONFIG_REISERFS_CHECK if (flag == M_PASTE && tb->insert_size[0]) { print_cur_tb("12290"); reiserfs_panic(tb->tb_sb, "PAP-12290", "insert_size is still not 0 (%d)", tb->insert_size[0]); } #endif /* Leaf level of the tree is balanced (end of balance_leaf) */ return 0; } /* Make empty node */ void make_empty_node(struct buffer_info *bi) { struct block_head *blkh; RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); blkh = B_BLK_HEAD(bi->bi_bh); set_blkh_nr_item(blkh, 0); set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); if (bi->bi_parent) B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ } /* Get first empty buffer */ struct buffer_head *get_FEB(struct tree_balance *tb) { int i; struct buffer_info bi; for (i = 0; i < MAX_FEB_SIZE; i++) if (tb->FEB[i] != NULL) break; if (i == MAX_FEB_SIZE) reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); buffer_info_init_bh(tb, &bi, tb->FEB[i]); make_empty_node(&bi); set_buffer_uptodate(tb->FEB[i]); tb->used[i] = tb->FEB[i]; tb->FEB[i] = NULL; return tb->used[i]; } /* This is now used because reiserfs_free_block has to be able to schedule. */ static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) { int i; if (buffer_dirty(bh)) reiserfs_warning(tb->tb_sb, "reiserfs-12320", "called with dirty buffer"); for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) if (!tb->thrown[i]) { tb->thrown[i] = bh; get_bh(bh); /* free_thrown puts this */ return; } reiserfs_warning(tb->tb_sb, "reiserfs-12321", "too many thrown buffers"); } static void free_thrown(struct tree_balance *tb) { int i; b_blocknr_t blocknr; for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { if (tb->thrown[i]) { blocknr = tb->thrown[i]->b_blocknr; if (buffer_dirty(tb->thrown[i])) reiserfs_warning(tb->tb_sb, "reiserfs-12322", "called with dirty buffer %d", blocknr); brelse(tb->thrown[i]); /* incremented in store_thrown */ reiserfs_free_block(tb->transaction_handle, NULL, blocknr, 0); } } } void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) { struct block_head *blkh; blkh = B_BLK_HEAD(bh); set_blkh_level(blkh, FREE_LEVEL); set_blkh_nr_item(blkh, 0); clear_buffer_dirty(bh); store_thrown(tb, bh); } /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, struct buffer_head *src, int n_src) { RFALSE(dest == NULL || src == NULL, "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", src, dest); RFALSE(!B_IS_KEYS_LEVEL(dest), "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", dest); RFALSE(n_dest < 0 || n_src < 0, "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); if (B_IS_ITEMS_LEVEL(src)) /* source buffer contains leaf node */ memcpy(internal_key(dest, n_dest), item_head(src, n_src), KEY_SIZE); else memcpy(internal_key(dest, n_dest), internal_key(src, n_src), KEY_SIZE); do_balance_mark_internal_dirty(tb, dest, 0); } int get_left_neighbor_position(struct tree_balance *tb, int h) { int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); if (Sh_position == 0) return B_NR_ITEMS(tb->FL[h]); else return Sh_position - 1; } int get_right_neighbor_position(struct tree_balance *tb, int h) { int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) return 0; else return Sh_position + 1; } #ifdef CONFIG_REISERFS_CHECK int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); static void check_internal_node(struct super_block *s, struct buffer_head *bh, char *mes) { struct disk_child *dc; int i; RFALSE(!bh, "PAP-12336: bh == 0"); if (!bh || !B_IS_IN_TREE(bh)) return; RFALSE(!buffer_dirty(bh) && !(buffer_journaled(bh) || buffer_journal_dirty(bh)), "PAP-12337: buffer (%b) must be dirty", bh); dc = B_N_CHILD(bh, 0); for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { if (!is_reusable(s, dc_block_number(dc), 1)) { print_cur_tb(mes); reiserfs_panic(s, "PAP-12338", "invalid child pointer %y in %b", dc, bh); } } } static int locked_or_not_in_tree(struct tree_balance *tb, struct buffer_head *bh, char *which) { if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || !B_IS_IN_TREE(bh)) { reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); return 1; } return 0; } static int check_before_balancing(struct tree_balance *tb) { int retval = 0; if (REISERFS_SB(tb->tb_sb)->cur_tb) { reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " "occurred based on cur_tb not being null at " "this point in code. do_balance cannot properly " "handle concurrent tree accesses on a same " "mount point."); } /* * double check that buffers that we will modify are unlocked. * (fix_nodes should already have prepped all of these for us). */ if (tb->lnum[0]) { retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); check_leaf(tb->L[0]); } if (tb->rnum[0]) { retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); check_leaf(tb->R[0]); } retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), "S[0]"); check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); return retval; } static void check_after_balance_leaf(struct tree_balance *tb) { if (tb->lnum[0]) { if (B_FREE_SPACE(tb->L[0]) != MAX_CHILD_SIZE(tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position(tb, 0)))) { print_cur_tb("12221"); reiserfs_panic(tb->tb_sb, "PAP-12355", "shift to left was incorrect"); } } if (tb->rnum[0]) { if (B_FREE_SPACE(tb->R[0]) != MAX_CHILD_SIZE(tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position(tb, 0)))) { print_cur_tb("12222"); reiserfs_panic(tb->tb_sb, "PAP-12360", "shift to right was incorrect"); } } if (PATH_H_PBUFFER(tb->tb_path, 1) && (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), PATH_H_POSITION(tb->tb_path, 1)))))) { int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), PATH_H_POSITION(tb->tb_path, 1)))); print_cur_tb("12223"); reiserfs_warning(tb->tb_sb, "reiserfs-12363", "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", left, MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), PATH_H_PBUFFER(tb->tb_path, 1), PATH_H_POSITION(tb->tb_path, 1), dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path, 1), PATH_H_POSITION(tb->tb_path, 1))), right); reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); } } static void check_leaf_level(struct tree_balance *tb) { check_leaf(tb->L[0]); check_leaf(tb->R[0]); check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); } static void check_internal_levels(struct tree_balance *tb) { int h; /* check all internal nodes */ for (h = 1; tb->insert_size[h]; h++) { check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), "BAD BUFFER ON PATH"); if (tb->lnum[h]) check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); if (tb->rnum[h]) check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); } } #endif /* * Now we have all of the buffers that must be used in balancing of * the tree. We rely on the assumption that schedule() will not occur * while do_balance works. ( Only interrupt handlers are acceptable.) * We balance the tree according to the analysis made before this, * using buffers already obtained. For SMP support it will someday be * necessary to add ordered locking of tb. */ /* * Some interesting rules of balancing: * we delete a maximum of two nodes per level per balancing: we never * delete R, when we delete two of three nodes L, S, R then we move * them into R. * * we only delete L if we are deleting two nodes, if we delete only * one node we delete S * * if we shift leaves then we shift as much as we can: this is a * deliberate policy of extremism in node packing which results in * higher average utilization after repeated random balance operations * at the cost of more memory copies and more balancing as a result of * small insertions to full nodes. * * if we shift internal nodes we try to evenly balance the node * utilization, with consequent less balancing at the cost of lower * utilization. * * one could argue that the policy for directories in leaves should be * that of internal nodes, but we will wait until another day to * evaluate this.... It would be nice to someday measure and prove * these assumptions as to what is optimal.... */ static inline void do_balance_starts(struct tree_balance *tb) { /* use print_cur_tb() to see initial state of struct tree_balance */ /* store_print_tb (tb); */ /* do not delete, just comment it out */ /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, "check"); */ RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); #ifdef CONFIG_REISERFS_CHECK REISERFS_SB(tb->tb_sb)->cur_tb = tb; #endif } static inline void do_balance_completed(struct tree_balance *tb) { #ifdef CONFIG_REISERFS_CHECK check_leaf_level(tb); check_internal_levels(tb); REISERFS_SB(tb->tb_sb)->cur_tb = NULL; #endif /* * reiserfs_free_block is no longer schedule safe. So, we need to * put the buffers we want freed on the thrown list during do_balance, * and then free them now */ REISERFS_SB(tb->tb_sb)->s_do_balance++; /* release all nodes hold to perform the balancing */ unfix_nodes(tb); free_thrown(tb); } /* * do_balance - balance the tree * * @tb: tree_balance structure * @ih: item header of inserted item * @body: body of inserted item or bytes to paste * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste * * Cut means delete part of an item (includes removing an entry from a * directory). * * Delete means delete whole item. * * Insert means add a new item into the tree. * * Paste means to append to the end of an existing file or to * insert a directory entry. */ void do_balance(struct tree_balance *tb, struct item_head *ih, const char *body, int flag) { int child_pos; /* position of a child node in its parent */ int h; /* level of the tree being processed */ /* * in our processing of one level we sometimes determine what * must be inserted into the next higher level. This insertion * consists of a key or two keys and their corresponding * pointers */ struct item_head insert_key[2]; /* inserted node-ptrs for the next level */ struct buffer_head *insert_ptr[2]; tb->tb_mode = flag; tb->need_balance_dirty = 0; if (FILESYSTEM_CHANGED_TB(tb)) { reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " "changed"); } /* if we have no real work to do */ if (!tb->insert_size[0]) { reiserfs_warning(tb->tb_sb, "PAP-12350", "insert_size == 0, mode == %c", flag); unfix_nodes(tb); return; } atomic_inc(&fs_generation(tb->tb_sb)); do_balance_starts(tb); /* * balance_leaf returns 0 except if combining L R and S into * one node. see balance_internal() for explanation of this * line of code. */ child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); #ifdef CONFIG_REISERFS_CHECK check_after_balance_leaf(tb); #endif /* Balance internal level of the tree. */ for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) child_pos = balance_internal(tb, h, child_pos, insert_key, insert_ptr); do_balance_completed(tb); } |
58 19 2 41 1 31 11 18 19 19 5 37 38 27 27 38 5 20 12 15 12 29 17 12 30 30 38 38 38 16 38 15 38 30 30 38 38 18 3 34 16 36 36 36 36 36 19 17 2 17 13 56 2 5 59 3 3 18 43 57 5 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/time.h> #include <linux/writeback.h> #include <linux/uio.h> #include <linux/random.h> #include <linux/iversion.h> #include "exfat_raw.h" #include "exfat_fs.h" int __exfat_write_inode(struct inode *inode, int sync) { unsigned long long on_disk_size; struct exfat_dentry *ep, *ep2; struct exfat_entry_set_cache es; struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); bool is_dir = (ei->type == TYPE_DIR) ? true : false; struct timespec64 ts; if (inode->i_ino == EXFAT_ROOT_INO) return 0; /* * If the inode is already unlinked, there is no need for updating it. */ if (ei->dir.dir == DIR_DELETED) return 0; if (is_dir && ei->dir.dir == sbi->root_dir && ei->entry == -1) return 0; exfat_set_volume_dirty(sb); /* get the directory entry of given file or directory */ if (exfat_get_dentry_set(&es, sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES)) return -EIO; ep = exfat_get_dentry_cached(&es, ES_IDX_FILE); ep2 = exfat_get_dentry_cached(&es, ES_IDX_STREAM); ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode)); /* set FILE_INFO structure using the acquired struct exfat_dentry */ exfat_set_entry_time(sbi, &ei->i_crtime, &ep->dentry.file.create_tz, &ep->dentry.file.create_time, &ep->dentry.file.create_date, &ep->dentry.file.create_time_cs); ts = inode_get_mtime(inode); exfat_set_entry_time(sbi, &ts, &ep->dentry.file.modify_tz, &ep->dentry.file.modify_time, &ep->dentry.file.modify_date, &ep->dentry.file.modify_time_cs); ts = inode_get_atime(inode); exfat_set_entry_time(sbi, &ts, &ep->dentry.file.access_tz, &ep->dentry.file.access_time, &ep->dentry.file.access_date, NULL); /* File size should be zero if there is no cluster allocated */ on_disk_size = i_size_read(inode); if (ei->start_clu == EXFAT_EOF_CLUSTER) on_disk_size = 0; ep2->dentry.stream.valid_size = cpu_to_le64(on_disk_size); ep2->dentry.stream.size = ep2->dentry.stream.valid_size; if (on_disk_size) { ep2->dentry.stream.flags = ei->flags; ep2->dentry.stream.start_clu = cpu_to_le32(ei->start_clu); } else { ep2->dentry.stream.flags = ALLOC_FAT_CHAIN; ep2->dentry.stream.start_clu = EXFAT_FREE_CLUSTER; } exfat_update_dir_chksum_with_entry_set(&es); return exfat_put_dentry_set(&es, sync); } int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); ret = __exfat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); return ret; } void exfat_sync_inode(struct inode *inode) { lockdep_assert_held(&EXFAT_SB(inode->i_sb)->s_lock); __exfat_write_inode(inode, 1); } /* * Input: inode, (logical) clu_offset, target allocation area * Output: errcode, cluster number * *clu = (~0), if it's unable to allocate a new cluster */ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, unsigned int *clu, int create) { int ret; unsigned int last_clu; struct exfat_chain new_clu; struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); unsigned int local_clu_offset = clu_offset; unsigned int num_to_be_allocated = 0, num_clusters = 0; if (ei->i_size_ondisk > 0) num_clusters = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); if (clu_offset >= num_clusters) num_to_be_allocated = clu_offset - num_clusters + 1; if (!create && (num_to_be_allocated > 0)) { *clu = EXFAT_EOF_CLUSTER; return 0; } *clu = last_clu = ei->start_clu; if (ei->flags == ALLOC_NO_FAT_CHAIN) { if (clu_offset > 0 && *clu != EXFAT_EOF_CLUSTER) { last_clu += clu_offset - 1; if (clu_offset == num_clusters) *clu = EXFAT_EOF_CLUSTER; else *clu += clu_offset; } } else if (ei->type == TYPE_FILE) { unsigned int fclus = 0; int err = exfat_get_cluster(inode, clu_offset, &fclus, clu, &last_clu, 1); if (err) return -EIO; clu_offset -= fclus; } else { /* hint information */ if (clu_offset > 0 && ei->hint_bmap.off != EXFAT_EOF_CLUSTER && ei->hint_bmap.off > 0 && clu_offset >= ei->hint_bmap.off) { clu_offset -= ei->hint_bmap.off; /* hint_bmap.clu should be valid */ WARN_ON(ei->hint_bmap.clu < 2); *clu = ei->hint_bmap.clu; } while (clu_offset > 0 && *clu != EXFAT_EOF_CLUSTER) { last_clu = *clu; if (exfat_get_next_cluster(sb, clu)) return -EIO; clu_offset--; } } if (*clu == EXFAT_EOF_CLUSTER) { exfat_set_volume_dirty(sb); new_clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? EXFAT_EOF_CLUSTER : last_clu + 1; new_clu.size = 0; new_clu.flags = ei->flags; /* allocate a cluster */ if (num_to_be_allocated < 1) { /* Broken FAT (i_sze > allocated FAT) */ exfat_fs_error(sb, "broken FAT chain."); return -EIO; } ret = exfat_alloc_cluster(inode, num_to_be_allocated, &new_clu, inode_needs_sync(inode)); if (ret) return ret; if (new_clu.dir == EXFAT_EOF_CLUSTER || new_clu.dir == EXFAT_FREE_CLUSTER) { exfat_fs_error(sb, "bogus cluster new allocated (last_clu : %u, new_clu : %u)", last_clu, new_clu.dir); return -EIO; } /* append to the FAT chain */ if (last_clu == EXFAT_EOF_CLUSTER) { if (new_clu.flags == ALLOC_FAT_CHAIN) ei->flags = ALLOC_FAT_CHAIN; ei->start_clu = new_clu.dir; } else { if (new_clu.flags != ei->flags) { /* no-fat-chain bit is disabled, * so fat-chain should be synced with * alloc-bitmap */ exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters); ei->flags = ALLOC_FAT_CHAIN; } if (new_clu.flags == ALLOC_FAT_CHAIN) if (exfat_ent_set(sb, last_clu, new_clu.dir)) return -EIO; } num_clusters += num_to_be_allocated; *clu = new_clu.dir; inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9; /* * Move *clu pointer along FAT chains (hole care) because the * caller of this function expect *clu to be the last cluster. * This only works when num_to_be_allocated >= 2, * *clu = (the first cluster of the allocated chain) => * (the last cluster of ...) */ if (ei->flags == ALLOC_NO_FAT_CHAIN) { *clu += num_to_be_allocated - 1; } else { while (num_to_be_allocated > 1) { if (exfat_get_next_cluster(sb, clu)) return -EIO; num_to_be_allocated--; } } } /* hint information */ ei->hint_bmap.off = local_clu_offset; ei->hint_bmap.clu = *clu; return 0; } static int exfat_map_new_buffer(struct exfat_inode_info *ei, struct buffer_head *bh, loff_t pos) { if (buffer_delay(bh) && pos > ei->i_size_aligned) return -EIO; set_buffer_new(bh); /* * Adjust i_size_aligned if i_size_ondisk is bigger than it. */ if (ei->i_size_ondisk > ei->i_size_aligned) ei->i_size_aligned = ei->i_size_ondisk; return 0; } static int exfat_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { struct exfat_inode_info *ei = EXFAT_I(inode); struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; int err = 0; unsigned long mapped_blocks = 0; unsigned int cluster, sec_offset; sector_t last_block; sector_t phys = 0; loff_t pos; mutex_lock(&sbi->s_lock); last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size_read(inode), sb); if (iblock >= last_block && !create) goto done; /* Is this block already allocated? */ err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits, &cluster, create); if (err) { if (err != -ENOSPC) exfat_fs_error_ratelimit(sb, "failed to bmap (inode : %p iblock : %llu, err : %d)", inode, (unsigned long long)iblock, err); goto unlock_ret; } if (cluster == EXFAT_EOF_CLUSTER) goto done; /* sector offset in cluster */ sec_offset = iblock & (sbi->sect_per_clus - 1); phys = exfat_cluster_to_sector(sbi, cluster) + sec_offset; mapped_blocks = sbi->sect_per_clus - sec_offset; max_blocks = min(mapped_blocks, max_blocks); /* Treat newly added block / cluster */ if (iblock < last_block) create = 0; if (create || buffer_delay(bh_result)) { pos = EXFAT_BLK_TO_B((iblock + 1), sb); if (ei->i_size_ondisk < pos) ei->i_size_ondisk = pos; } if (create) { err = exfat_map_new_buffer(ei, bh_result, pos); if (err) { exfat_fs_error(sb, "requested for bmap out of range(pos : (%llu) > i_size_aligned(%llu)\n", pos, ei->i_size_aligned); goto unlock_ret; } } if (buffer_delay(bh_result)) clear_buffer_delay(bh_result); map_bh(bh_result, sb, phys); done: bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb); unlock_ret: mutex_unlock(&sbi->s_lock); return err; } static int exfat_read_folio(struct file *file, struct folio *folio) { return mpage_read_folio(folio, exfat_get_block); } static void exfat_readahead(struct readahead_control *rac) { mpage_readahead(rac, exfat_get_block); } static int exfat_writepages(struct address_space *mapping, struct writeback_control *wbc) { return mpage_writepages(mapping, wbc, exfat_get_block); } static void exfat_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; if (to > i_size_read(inode)) { truncate_pagecache(inode, i_size_read(inode)); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); exfat_truncate(inode); } } static int exfat_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, struct page **pagep, void **fsdata) { int ret; *pagep = NULL; ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata, exfat_get_block, &EXFAT_I(mapping->host)->i_size_ondisk); if (ret < 0) exfat_write_failed(mapping, pos+len); return ret; } static int exfat_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int copied, struct page *pagep, void *fsdata) { struct inode *inode = mapping->host; struct exfat_inode_info *ei = EXFAT_I(inode); int err; err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); if (ei->i_size_aligned < i_size_read(inode)) { exfat_fs_error(inode->i_sb, "invalid size(size(%llu) > aligned(%llu)\n", i_size_read(inode), ei->i_size_aligned); return -EIO; } if (err < len) exfat_write_failed(mapping, pos+len); if (!(err < 0) && !(ei->attr & EXFAT_ATTR_ARCHIVE)) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ei->attr |= EXFAT_ATTR_ARCHIVE; mark_inode_dirty(inode); } return err; } static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; loff_t size = iocb->ki_pos + iov_iter_count(iter); int rw = iov_iter_rw(iter); ssize_t ret; if (rw == WRITE) { /* * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->i_size_aligned to block boundary. * * But we must fill the remaining area or hole by nul for * updating ->i_size_aligned * * Return 0, and fallback to normal buffered write. */ if (EXFAT_I(inode)->i_size_aligned < size) return 0; } /* * Need to use the DIO_LOCKING for avoiding the race * condition of exfat_get_block() and ->truncate(). */ ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block); if (ret < 0 && (rw & WRITE)) exfat_write_failed(mapping, size); return ret; } static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block) { sector_t blocknr; /* exfat_get_cluster() assumes the requested blocknr isn't truncated. */ down_read(&EXFAT_I(mapping->host)->truncate_lock); blocknr = generic_block_bmap(mapping, block, exfat_get_block); up_read(&EXFAT_I(mapping->host)->truncate_lock); return blocknr; } /* * exfat_block_truncate_page() zeroes out a mapping from file offset `from' * up to the end of the block which corresponds to `from'. * This is required during truncate to physically zeroout the tail end * of that block so it doesn't yield old data if the file is later grown. * Also, avoid causing failure from fsx for cases of "data past EOF" */ int exfat_block_truncate_page(struct inode *inode, loff_t from) { return block_truncate_page(inode->i_mapping, from, exfat_get_block); } static const struct address_space_operations exfat_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = exfat_read_folio, .readahead = exfat_readahead, .writepages = exfat_writepages, .write_begin = exfat_write_begin, .write_end = exfat_write_end, .direct_IO = exfat_direct_IO, .bmap = exfat_aop_bmap, .migrate_folio = buffer_migrate_folio, }; static inline unsigned long exfat_hash(loff_t i_pos) { return hash_32(i_pos, EXFAT_HASH_BITS); } void exfat_hash_inode(struct inode *inode, loff_t i_pos) { struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); spin_lock(&sbi->inode_hash_lock); EXFAT_I(inode)->i_pos = i_pos; hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head); spin_unlock(&sbi->inode_hash_lock); } void exfat_unhash_inode(struct inode *inode) { struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); spin_lock(&sbi->inode_hash_lock); hlist_del_init(&EXFAT_I(inode)->i_hash_fat); EXFAT_I(inode)->i_pos = 0; spin_unlock(&sbi->inode_hash_lock); } struct inode *exfat_iget(struct super_block *sb, loff_t i_pos) { struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *info; struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); struct inode *inode = NULL; spin_lock(&sbi->inode_hash_lock); hlist_for_each_entry(info, head, i_hash_fat) { WARN_ON(info->vfs_inode.i_sb != sb); if (i_pos != info->i_pos) continue; inode = igrab(&info->vfs_inode); if (inode) break; } spin_unlock(&sbi->inode_hash_lock); return inode; } /* doesn't deal with root inode */ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) { struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); struct exfat_inode_info *ei = EXFAT_I(inode); loff_t size = info->size; ei->dir = info->dir; ei->entry = info->entry; ei->attr = info->attr; ei->start_clu = info->start_clu; ei->flags = info->flags; ei->type = info->type; ei->version = 0; ei->hint_stat.eidx = 0; ei->hint_stat.clu = info->start_clu; ei->hint_femp.eidx = EXFAT_HINT_NONE; ei->hint_bmap.off = EXFAT_EOF_CLUSTER; ei->i_pos = 0; inode->i_uid = sbi->options.fs_uid; inode->i_gid = sbi->options.fs_gid; inode_inc_iversion(inode); inode->i_generation = get_random_u32(); if (info->attr & EXFAT_ATTR_SUBDIR) { /* directory */ inode->i_generation &= ~1; inode->i_mode = exfat_make_mode(sbi, info->attr, 0777); inode->i_op = &exfat_dir_inode_operations; inode->i_fop = &exfat_dir_operations; set_nlink(inode, info->num_subdirs); } else { /* regular file */ inode->i_generation |= 1; inode->i_mode = exfat_make_mode(sbi, info->attr, 0777); inode->i_op = &exfat_file_inode_operations; inode->i_fop = &exfat_file_operations; inode->i_mapping->a_ops = &exfat_aops; inode->i_mapping->nrpages = 0; } i_size_write(inode, size); /* ondisk and aligned size should be aligned with block size */ if (size & (inode->i_sb->s_blocksize - 1)) { size |= (inode->i_sb->s_blocksize - 1); size++; } ei->i_size_aligned = size; ei->i_size_ondisk = size; exfat_save_attr(inode, info->attr); inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; inode_set_mtime_to_ts(inode, info->mtime); inode_set_ctime_to_ts(inode, info->mtime); ei->i_crtime = info->crtime; inode_set_atime_to_ts(inode, info->atime); return 0; } struct inode *exfat_build_inode(struct super_block *sb, struct exfat_dir_entry *info, loff_t i_pos) { struct inode *inode; int err; inode = exfat_iget(sb, i_pos); if (inode) goto out; inode = new_inode(sb); if (!inode) { inode = ERR_PTR(-ENOMEM); goto out; } inode->i_ino = iunique(sb, EXFAT_ROOT_INO); inode_set_iversion(inode, 1); err = exfat_fill_inode(inode, info); if (err) { iput(inode); inode = ERR_PTR(err); goto out; } exfat_hash_inode(inode, i_pos); insert_inode_hash(inode); out: return inode; } void exfat_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); if (!inode->i_nlink) { i_size_write(inode, 0); mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); __exfat_truncate(inode); mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); } invalidate_inode_buffers(inode); clear_inode(inode); exfat_cache_inval_inode(inode); exfat_unhash_inode(inode); } |
1 1 1 1 1 1 1 1 1 1 4 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Bluetooth HCI UART driver * * Copyright (C) 2000-2001 Qualcomm Incorporated * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com> * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org> */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/interrupt.h> #include <linux/ptrace.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/signal.h> #include <linux/ioctl.h> #include <linux/skbuff.h> #include <linux/firmware.h> #include <linux/serdev.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include "btintel.h" #include "btbcm.h" #include "hci_uart.h" #define VERSION "2.3" static const struct hci_uart_proto *hup[HCI_UART_MAX_PROTO]; int hci_uart_register_proto(const struct hci_uart_proto *p) { if (p->id >= HCI_UART_MAX_PROTO) return -EINVAL; if (hup[p->id]) return -EEXIST; hup[p->id] = p; BT_INFO("HCI UART protocol %s registered", p->name); return 0; } int hci_uart_unregister_proto(const struct hci_uart_proto *p) { if (p->id >= HCI_UART_MAX_PROTO) return -EINVAL; if (!hup[p->id]) return -EINVAL; hup[p->id] = NULL; return 0; } static const struct hci_uart_proto *hci_uart_get_proto(unsigned int id) { if (id >= HCI_UART_MAX_PROTO) return NULL; return hup[id]; } static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type) { struct hci_dev *hdev = hu->hdev; /* Update HCI stat counters */ switch (pkt_type) { case HCI_COMMAND_PKT: hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: hdev->stat.acl_tx++; break; case HCI_SCODATA_PKT: hdev->stat.sco_tx++; break; } } static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) { struct sk_buff *skb = hu->tx_skb; if (!skb) { percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) skb = hu->proto->dequeue(hu); percpu_up_read(&hu->proto_lock); } else { hu->tx_skb = NULL; } return skb; } int hci_uart_tx_wakeup(struct hci_uart *hu) { /* This may be called in an IRQ context, so we can't sleep. Therefore * we try to acquire the lock only, and if that fails we assume the * tty is being closed because that is the only time the write lock is * acquired. If, however, at some point in the future the write lock * is also acquired in other situations, then this must be revisited. */ if (!percpu_down_read_trylock(&hu->proto_lock)) return 0; if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) goto no_schedule; set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) goto no_schedule; BT_DBG(""); schedule_work(&hu->write_work); no_schedule: percpu_up_read(&hu->proto_lock); return 0; } EXPORT_SYMBOL_GPL(hci_uart_tx_wakeup); static void hci_uart_write_work(struct work_struct *work) { struct hci_uart *hu = container_of(work, struct hci_uart, write_work); struct tty_struct *tty = hu->tty; struct hci_dev *hdev = hu->hdev; struct sk_buff *skb; /* REVISIT: should we cope with bad skbs or ->write() returning * and error value ? */ restart: clear_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); while ((skb = hci_uart_dequeue(hu))) { int len; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); len = tty->ops->write(tty, skb->data, skb->len); hdev->stat.byte_tx += len; skb_pull(skb, len); if (skb->len) { hu->tx_skb = skb; break; } hci_uart_tx_complete(hu, hci_skb_pkt_type(skb)); kfree_skb(skb); } clear_bit(HCI_UART_SENDING, &hu->tx_state); if (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)) goto restart; wake_up_bit(&hu->tx_state, HCI_UART_SENDING); } void hci_uart_init_work(struct work_struct *work) { struct hci_uart *hu = container_of(work, struct hci_uart, init_ready); int err; struct hci_dev *hdev; if (!test_and_clear_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return; err = hci_register_dev(hu->hdev); if (err < 0) { BT_ERR("Can't register HCI device"); clear_bit(HCI_UART_PROTO_READY, &hu->flags); hu->proto->close(hu); hdev = hu->hdev; hu->hdev = NULL; hci_free_dev(hdev); return; } set_bit(HCI_UART_REGISTERED, &hu->flags); } int hci_uart_init_ready(struct hci_uart *hu) { if (!test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return -EALREADY; schedule_work(&hu->init_ready); return 0; } int hci_uart_wait_until_sent(struct hci_uart *hu) { return wait_on_bit_timeout(&hu->tx_state, HCI_UART_SENDING, TASK_INTERRUPTIBLE, msecs_to_jiffies(2000)); } /* ------- Interface to HCI layer ------ */ /* Reset device */ static int hci_uart_flush(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct tty_struct *tty = hu->tty; BT_DBG("hdev %p tty %p", hdev, tty); if (hu->tx_skb) { kfree_skb(hu->tx_skb); hu->tx_skb = NULL; } /* Flush any pending characters in the driver and discipline. */ tty_ldisc_flush(tty); tty_driver_flush_buffer(tty); percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) hu->proto->flush(hu); percpu_up_read(&hu->proto_lock); return 0; } /* Initialize device */ static int hci_uart_open(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); /* Undo clearing this from hci_uart_close() */ hdev->flush = hci_uart_flush; return 0; } /* Close device */ static int hci_uart_close(struct hci_dev *hdev) { BT_DBG("hdev %p", hdev); hci_uart_flush(hdev); hdev->flush = NULL; return 0; } /* Send frames from HCI layer */ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_uart *hu = hci_get_drvdata(hdev); BT_DBG("%s: type %d len %d", hdev->name, hci_skb_pkt_type(skb), skb->len); percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { percpu_up_read(&hu->proto_lock); return -EUNATCH; } hu->proto->enqueue(hu, skb); percpu_up_read(&hu->proto_lock); hci_uart_tx_wakeup(hu); return 0; } /* Check the underlying device or tty has flow control support */ bool hci_uart_has_flow_control(struct hci_uart *hu) { /* serdev nodes check if the needed operations are present */ if (hu->serdev) return true; if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) return true; return false; } /* Flow control or un-flow control the device */ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) { struct tty_struct *tty = hu->tty; struct ktermios ktermios; int status; unsigned int set = 0; unsigned int clear = 0; if (hu->serdev) { serdev_device_set_flow_control(hu->serdev, !enable); serdev_device_set_rts(hu->serdev, !enable); return; } if (enable) { /* Disable hardware flow control */ ktermios = tty->termios; ktermios.c_cflag &= ~CRTSCTS; tty_set_termios(tty, &ktermios); BT_DBG("Disabling hardware flow control: %s", (tty->termios.c_cflag & CRTSCTS) ? "failed" : "success"); /* Clear RTS to prevent the device from sending */ /* Most UARTs need OUT2 to enable interrupts */ status = tty->driver->ops->tiocmget(tty); BT_DBG("Current tiocm 0x%x", status); set &= ~(TIOCM_OUT2 | TIOCM_RTS); clear = ~set; set &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; clear &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; status = tty->driver->ops->tiocmset(tty, set, clear); BT_DBG("Clearing RTS: %s", status ? "failed" : "success"); } else { /* Set RTS to allow the device to send again */ status = tty->driver->ops->tiocmget(tty); BT_DBG("Current tiocm 0x%x", status); set |= (TIOCM_OUT2 | TIOCM_RTS); clear = ~set; set &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; clear &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | TIOCM_OUT2 | TIOCM_LOOP; status = tty->driver->ops->tiocmset(tty, set, clear); BT_DBG("Setting RTS: %s", status ? "failed" : "success"); /* Re-enable hardware flow control */ ktermios = tty->termios; ktermios.c_cflag |= CRTSCTS; tty_set_termios(tty, &ktermios); BT_DBG("Enabling hardware flow control: %s", !(tty->termios.c_cflag & CRTSCTS) ? "failed" : "success"); } } void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed, unsigned int oper_speed) { hu->init_speed = init_speed; hu->oper_speed = oper_speed; } void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed) { struct tty_struct *tty = hu->tty; struct ktermios ktermios; ktermios = tty->termios; ktermios.c_cflag &= ~CBAUD; tty_termios_encode_baud_rate(&ktermios, speed, speed); /* tty_set_termios() return not checked as it is always 0 */ tty_set_termios(tty, &ktermios); BT_DBG("%s: New tty speeds: %d/%d", hu->hdev->name, tty->termios.c_ispeed, tty->termios.c_ospeed); } static int hci_uart_setup(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct hci_rp_read_local_version *ver; struct sk_buff *skb; unsigned int speed; int err; /* Init speed if any */ if (hu->init_speed) speed = hu->init_speed; else if (hu->proto->init_speed) speed = hu->proto->init_speed; else speed = 0; if (speed) hci_uart_set_baudrate(hu, speed); /* Operational speed if any */ if (hu->oper_speed) speed = hu->oper_speed; else if (hu->proto->oper_speed) speed = hu->proto->oper_speed; else speed = 0; if (hu->proto->set_baudrate && speed) { err = hu->proto->set_baudrate(hu, speed); if (!err) hci_uart_set_baudrate(hu, speed); } if (hu->proto->setup) return hu->proto->setup(hu); if (!test_bit(HCI_UART_VND_DETECT, &hu->hdev_flags)) return 0; skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { BT_ERR("%s: Reading local version information failed (%ld)", hdev->name, PTR_ERR(skb)); return 0; } if (skb->len != sizeof(*ver)) { BT_ERR("%s: Event length mismatch for version information", hdev->name); goto done; } ver = (struct hci_rp_read_local_version *)skb->data; switch (le16_to_cpu(ver->manufacturer)) { #ifdef CONFIG_BT_HCIUART_INTEL case 2: hdev->set_bdaddr = btintel_set_bdaddr; btintel_check_bdaddr(hdev); break; #endif #ifdef CONFIG_BT_HCIUART_BCM case 15: hdev->set_bdaddr = btbcm_set_bdaddr; btbcm_check_bdaddr(hdev); break; #endif default: break; } done: kfree_skb(skb); return 0; } /* ------ LDISC part ------ */ /* hci_uart_tty_open * * Called when line discipline changed to HCI_UART. * * Arguments: * tty pointer to tty info structure * Return Value: * 0 if success, otherwise error code */ static int hci_uart_tty_open(struct tty_struct *tty) { struct hci_uart *hu; BT_DBG("tty %p", tty); if (!capable(CAP_NET_ADMIN)) return -EPERM; /* Error if the tty has no write op instead of leaving an exploitable * hole */ if (tty->ops->write == NULL) return -EOPNOTSUPP; hu = kzalloc(sizeof(struct hci_uart), GFP_KERNEL); if (!hu) { BT_ERR("Can't allocate control structure"); return -ENFILE; } if (percpu_init_rwsem(&hu->proto_lock)) { BT_ERR("Can't allocate semaphore structure"); kfree(hu); return -ENOMEM; } tty->disc_data = hu; hu->tty = tty; tty->receive_room = 65536; /* disable alignment support by default */ hu->alignment = 1; hu->padding = 0; INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); /* Flush any pending characters in the driver */ tty_driver_flush_buffer(tty); return 0; } /* hci_uart_tty_close() * * Called when the line discipline is changed to something * else, the tty is closed, or the tty detects a hangup. */ static void hci_uart_tty_close(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; struct hci_dev *hdev; BT_DBG("tty %p", tty); /* Detach from the tty */ tty->disc_data = NULL; if (!hu) return; hdev = hu->hdev; if (hdev) hci_uart_close(hdev); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); percpu_up_write(&hu->proto_lock); cancel_work_sync(&hu->init_ready); cancel_work_sync(&hu->write_work); if (hdev) { if (test_bit(HCI_UART_REGISTERED, &hu->flags)) hci_unregister_dev(hdev); hci_free_dev(hdev); } hu->proto->close(hu); } clear_bit(HCI_UART_PROTO_SET, &hu->flags); percpu_free_rwsem(&hu->proto_lock); kfree(hu); } /* hci_uart_tty_wakeup() * * Callback for transmit wakeup. Called when low level * device driver can accept more send data. * * Arguments: tty pointer to associated tty instance data * Return Value: None */ static void hci_uart_tty_wakeup(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; BT_DBG(""); if (!hu) return; clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); if (tty != hu->tty) return; if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) hci_uart_tx_wakeup(hu); } /* hci_uart_tty_receive() * * Called by tty low level driver when receive data is * available. * * Arguments: tty pointer to tty isntance data * data pointer to received data * flags pointer to flags for data * count count of received data in bytes * * Return Value: None */ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, const u8 *flags, size_t count) { struct hci_uart *hu = tty->disc_data; if (!hu || tty != hu->tty) return; percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { percpu_up_read(&hu->proto_lock); return; } /* It does not need a lock here as it is already protected by a mutex in * tty caller */ hu->proto->recv(hu, data, count); percpu_up_read(&hu->proto_lock); if (hu->hdev) hu->hdev->stat.byte_rx += count; tty_unthrottle(tty); } static int hci_uart_register_dev(struct hci_uart *hu) { struct hci_dev *hdev; int err; BT_DBG(""); /* Initialize and register HCI device */ hdev = hci_alloc_dev(); if (!hdev) { BT_ERR("Can't allocate HCI device"); return -ENOMEM; } hu->hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, hu); /* Only when vendor specific setup callback is provided, consider * the manufacturer information valid. This avoids filling in the * value for Ericsson when nothing is specified. */ if (hu->proto->setup) hdev->manufacturer = hu->proto->manufacturer; hdev->open = hci_uart_open; hdev->close = hci_uart_close; hdev->flush = hci_uart_flush; hdev->send = hci_uart_send_frame; hdev->setup = hci_uart_setup; SET_HCIDEV_DEV(hdev, hu->tty->dev); if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); if (test_bit(HCI_UART_CREATE_AMP, &hu->hdev_flags)) hdev->dev_type = HCI_AMP; else hdev->dev_type = HCI_PRIMARY; /* Only call open() for the protocol after hdev is fully initialized as * open() (or a timer/workqueue it starts) may attempt to reference it. */ err = hu->proto->open(hu); if (err) { hu->hdev = NULL; hci_free_dev(hdev); return err; } if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return 0; if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hu->proto->close(hu); hu->hdev = NULL; hci_free_dev(hdev); return -ENODEV; } set_bit(HCI_UART_REGISTERED, &hu->flags); return 0; } static int hci_uart_set_proto(struct hci_uart *hu, int id) { const struct hci_uart_proto *p; int err; p = hci_uart_get_proto(id); if (!p) return -EPROTONOSUPPORT; hu->proto = p; err = hci_uart_register_dev(hu); if (err) { return err; } set_bit(HCI_UART_PROTO_READY, &hu->flags); return 0; } static int hci_uart_set_flags(struct hci_uart *hu, unsigned long flags) { unsigned long valid_flags = BIT(HCI_UART_RAW_DEVICE) | BIT(HCI_UART_RESET_ON_INIT) | BIT(HCI_UART_CREATE_AMP) | BIT(HCI_UART_INIT_PENDING) | BIT(HCI_UART_EXT_CONFIG) | BIT(HCI_UART_VND_DETECT); if (flags & ~valid_flags) return -EINVAL; hu->hdev_flags = flags; return 0; } /* hci_uart_tty_ioctl() * * Process IOCTL system call for the tty device. * * Arguments: * * tty pointer to tty instance data * cmd IOCTL command code * arg argument for IOCTL call (cmd dependent) * * Return Value: Command dependent */ static int hci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct hci_uart *hu = tty->disc_data; int err = 0; BT_DBG(""); /* Verify the status of the device */ if (!hu) return -EBADF; switch (cmd) { case HCIUARTSETPROTO: if (!test_and_set_bit(HCI_UART_PROTO_SET, &hu->flags)) { err = hci_uart_set_proto(hu, arg); if (err) clear_bit(HCI_UART_PROTO_SET, &hu->flags); } else err = -EBUSY; break; case HCIUARTGETPROTO: if (test_bit(HCI_UART_PROTO_SET, &hu->flags) && test_bit(HCI_UART_PROTO_READY, &hu->flags)) err = hu->proto->id; else err = -EUNATCH; break; case HCIUARTGETDEVICE: if (test_bit(HCI_UART_REGISTERED, &hu->flags)) err = hu->hdev->id; else err = -EUNATCH; break; case HCIUARTSETFLAGS: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) err = -EBUSY; else err = hci_uart_set_flags(hu, arg); break; case HCIUARTGETFLAGS: err = hu->hdev_flags; break; default: err = n_tty_ioctl_helper(tty, cmd, arg); break; } return err; } /* * We don't provide read/write/poll interface for user space. */ static ssize_t hci_uart_tty_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t nr, void **cookie, unsigned long offset) { return 0; } static ssize_t hci_uart_tty_write(struct tty_struct *tty, struct file *file, const u8 *data, size_t count) { return 0; } static struct tty_ldisc_ops hci_uart_ldisc = { .owner = THIS_MODULE, .num = N_HCI, .name = "n_hci", .open = hci_uart_tty_open, .close = hci_uart_tty_close, .read = hci_uart_tty_read, .write = hci_uart_tty_write, .ioctl = hci_uart_tty_ioctl, .compat_ioctl = hci_uart_tty_ioctl, .receive_buf = hci_uart_tty_receive, .write_wakeup = hci_uart_tty_wakeup, }; static int __init hci_uart_init(void) { int err; BT_INFO("HCI UART driver ver %s", VERSION); /* Register the tty discipline */ err = tty_register_ldisc(&hci_uart_ldisc); if (err) { BT_ERR("HCI line discipline registration failed. (%d)", err); return err; } #ifdef CONFIG_BT_HCIUART_H4 h4_init(); #endif #ifdef CONFIG_BT_HCIUART_BCSP bcsp_init(); #endif #ifdef CONFIG_BT_HCIUART_LL ll_init(); #endif #ifdef CONFIG_BT_HCIUART_ATH3K ath_init(); #endif #ifdef CONFIG_BT_HCIUART_3WIRE h5_init(); #endif #ifdef CONFIG_BT_HCIUART_INTEL intel_init(); #endif #ifdef CONFIG_BT_HCIUART_BCM bcm_init(); #endif #ifdef CONFIG_BT_HCIUART_QCA qca_init(); #endif #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_init(); #endif #ifdef CONFIG_BT_HCIUART_MRVL mrvl_init(); #endif return 0; } static void __exit hci_uart_exit(void) { #ifdef CONFIG_BT_HCIUART_H4 h4_deinit(); #endif #ifdef CONFIG_BT_HCIUART_BCSP bcsp_deinit(); #endif #ifdef CONFIG_BT_HCIUART_LL ll_deinit(); #endif #ifdef CONFIG_BT_HCIUART_ATH3K ath_deinit(); #endif #ifdef CONFIG_BT_HCIUART_3WIRE h5_deinit(); #endif #ifdef CONFIG_BT_HCIUART_INTEL intel_deinit(); #endif #ifdef CONFIG_BT_HCIUART_BCM bcm_deinit(); #endif #ifdef CONFIG_BT_HCIUART_QCA qca_deinit(); #endif #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_deinit(); #endif #ifdef CONFIG_BT_HCIUART_MRVL mrvl_deinit(); #endif tty_unregister_ldisc(&hci_uart_ldisc); } module_init(hci_uart_init); module_exit(hci_uart_exit); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth HCI UART driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_HCI); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_SYNPROXY_H #define _NF_CONNTRACK_SYNPROXY_H #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netns/generic.h> struct nf_conn_synproxy { u32 isn; u32 its; u32 tsoff; }; static inline struct nf_conn_synproxy *nfct_synproxy(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) return nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY); #else return NULL; #endif } static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) return nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY, GFP_ATOMIC); #else return NULL; #endif } static inline bool nf_ct_add_synproxy(struct nf_conn *ct, const struct nf_conn *tmpl) { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) if (tmpl && nfct_synproxy(tmpl)) { if (!nfct_seqadj_ext_add(ct)) return false; if (!nfct_synproxy_ext_add(ct)) return false; } #endif return true; } #endif /* _NF_CONNTRACK_SYNPROXY_H */ |
676 493 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM signal #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SIGNAL_H #include <linux/signal.h> #include <linux/sched.h> #include <linux/tracepoint.h> #define TP_STORE_SIGINFO(__entry, info) \ do { \ if (info == SEND_SIG_NOINFO) { \ __entry->errno = 0; \ __entry->code = SI_USER; \ } else if (info == SEND_SIG_PRIV) { \ __entry->errno = 0; \ __entry->code = SI_KERNEL; \ } else { \ __entry->errno = info->si_errno; \ __entry->code = info->si_code; \ } \ } while (0) #ifndef TRACE_HEADER_MULTI_READ enum { TRACE_SIGNAL_DELIVERED, TRACE_SIGNAL_IGNORED, TRACE_SIGNAL_ALREADY_PENDING, TRACE_SIGNAL_OVERFLOW_FAIL, TRACE_SIGNAL_LOSE_INFO, }; #endif /** * signal_generate - called when a signal is generated * @sig: signal number * @info: pointer to struct siginfo * @task: pointer to struct task_struct * @group: shared or private * @result: TRACE_SIGNAL_* * * Current process sends a 'sig' signal to 'task' process with * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, * 'info' is not a pointer and you can't access its field. Instead, * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV * means that si_code is SI_KERNEL. */ TRACE_EVENT(signal_generate, TP_PROTO(int sig, struct kernel_siginfo *info, struct task_struct *task, int group, int result), TP_ARGS(sig, info, task, group, result), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, group ) __field( int, result ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->pid = task->pid; __entry->group = group; __entry->result = result; ), TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", __entry->sig, __entry->errno, __entry->code, __entry->comm, __entry->pid, __entry->group, __entry->result) ); /** * signal_deliver - called when a signal is delivered * @sig: signal number * @info: pointer to struct siginfo * @ka: pointer to struct k_sigaction * * A 'sig' signal is delivered to current process with 'info' siginfo, * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or * SIG_DFL. * Note that some signals reported by signal_generate tracepoint can be * lost, ignored or modified (by debugger) before hitting this tracepoint. * This means, this can show which signals are actually delivered, but * matching generated signals and delivered signals may not be correct. */ TRACE_EVENT(signal_deliver, TP_PROTO(int sig, struct kernel_siginfo *info, struct k_sigaction *ka), TP_ARGS(sig, info, ka), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __field( unsigned long, sa_handler ) __field( unsigned long, sa_flags ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); __entry->sa_handler = (unsigned long)ka->sa.sa_handler; __entry->sa_flags = ka->sa.sa_flags; ), TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2017 Netronome Systems, Inc. * Copyright (C) 2019 Mellanox Technologies. All rights reserved */ #include <linux/completion.h> #include <linux/device.h> #include <linux/idr.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/slab.h> #include <linux/sysfs.h> #include "netdevsim.h" static DEFINE_IDA(nsim_bus_dev_ids); static LIST_HEAD(nsim_bus_dev_list); static DEFINE_MUTEX(nsim_bus_dev_list_lock); static bool nsim_bus_enable; static refcount_t nsim_bus_devs; /* Including the bus itself. */ static DECLARE_COMPLETION(nsim_bus_devs_released); static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev) { return container_of(dev, struct nsim_bus_dev, dev); } static ssize_t nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); unsigned int num_vfs; int ret; ret = kstrtouint(buf, 0, &num_vfs); if (ret) return ret; device_lock(dev); ret = -ENOENT; if (dev_get_drvdata(dev)) ret = nsim_drv_configure_vfs(nsim_bus_dev, num_vfs); device_unlock(dev); return ret ? ret : count; } static ssize_t nsim_bus_dev_numvfs_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); return sprintf(buf, "%u\n", nsim_bus_dev->num_vfs); } static struct device_attribute nsim_bus_dev_numvfs_attr = __ATTR(sriov_numvfs, 0664, nsim_bus_dev_numvfs_show, nsim_bus_dev_numvfs_store); static ssize_t new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); unsigned int port_index; int ret; /* Prevent to use nsim_bus_dev before initialization. */ if (!smp_load_acquire(&nsim_bus_dev->init)) return -EBUSY; ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; ret = nsim_drv_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); return ret ? ret : count; } static struct device_attribute nsim_bus_dev_new_port_attr = __ATTR_WO(new_port); static ssize_t del_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); unsigned int port_index; int ret; /* Prevent to use nsim_bus_dev before initialization. */ if (!smp_load_acquire(&nsim_bus_dev->init)) return -EBUSY; ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; ret = nsim_drv_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); return ret ? ret : count; } static struct device_attribute nsim_bus_dev_del_port_attr = __ATTR_WO(del_port); static struct attribute *nsim_bus_dev_attrs[] = { &nsim_bus_dev_numvfs_attr.attr, &nsim_bus_dev_new_port_attr.attr, &nsim_bus_dev_del_port_attr.attr, NULL, }; static const struct attribute_group nsim_bus_dev_attr_group = { .attrs = nsim_bus_dev_attrs, }; static const struct attribute_group *nsim_bus_dev_attr_groups[] = { &nsim_bus_dev_attr_group, NULL, }; static void nsim_bus_dev_release(struct device *dev) { struct nsim_bus_dev *nsim_bus_dev; nsim_bus_dev = container_of(dev, struct nsim_bus_dev, dev); kfree(nsim_bus_dev); if (refcount_dec_and_test(&nsim_bus_devs)) complete(&nsim_bus_devs_released); } static struct device_type nsim_bus_dev_type = { .groups = nsim_bus_dev_attr_groups, .release = nsim_bus_dev_release, }; static struct nsim_bus_dev * nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues); static ssize_t new_device_store(const struct bus_type *bus, const char *buf, size_t count) { unsigned int id, port_count, num_queues; struct nsim_bus_dev *nsim_bus_dev; int err; err = sscanf(buf, "%u %u %u", &id, &port_count, &num_queues); switch (err) { case 1: port_count = 1; fallthrough; case 2: num_queues = 1; fallthrough; case 3: if (id > INT_MAX) { pr_err("Value of \"id\" is too big.\n"); return -EINVAL; } break; default: pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n"); return -EINVAL; } mutex_lock(&nsim_bus_dev_list_lock); /* Prevent to use resource before initialization. */ if (!smp_load_acquire(&nsim_bus_enable)) { err = -EBUSY; goto err; } nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues); if (IS_ERR(nsim_bus_dev)) { err = PTR_ERR(nsim_bus_dev); goto err; } refcount_inc(&nsim_bus_devs); /* Allow using nsim_bus_dev */ smp_store_release(&nsim_bus_dev->init, true); list_add_tail(&nsim_bus_dev->list, &nsim_bus_dev_list); mutex_unlock(&nsim_bus_dev_list_lock); return count; err: mutex_unlock(&nsim_bus_dev_list_lock); return err; } static BUS_ATTR_WO(new_device); static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev); static ssize_t del_device_store(const struct bus_type *bus, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev, *tmp; unsigned int id; int err; err = sscanf(buf, "%u", &id); switch (err) { case 1: if (id > INT_MAX) { pr_err("Value of \"id\" is too big.\n"); return -EINVAL; } break; default: pr_err("Format for deleting device is \"id\" (uint).\n"); return -EINVAL; } err = -ENOENT; mutex_lock(&nsim_bus_dev_list_lock); /* Prevent to use resource before initialization. */ if (!smp_load_acquire(&nsim_bus_enable)) { mutex_unlock(&nsim_bus_dev_list_lock); return -EBUSY; } list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { if (nsim_bus_dev->dev.id != id) continue; list_del(&nsim_bus_dev->list); nsim_bus_dev_del(nsim_bus_dev); err = 0; break; } mutex_unlock(&nsim_bus_dev_list_lock); return !err ? count : err; } static BUS_ATTR_WO(del_device); static struct attribute *nsim_bus_attrs[] = { &bus_attr_new_device.attr, &bus_attr_del_device.attr, NULL }; ATTRIBUTE_GROUPS(nsim_bus); static int nsim_bus_probe(struct device *dev) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); return nsim_drv_probe(nsim_bus_dev); } static void nsim_bus_remove(struct device *dev) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); nsim_drv_remove(nsim_bus_dev); } static int nsim_num_vf(struct device *dev) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); return nsim_bus_dev->num_vfs; } static struct bus_type nsim_bus = { .name = DRV_NAME, .dev_name = DRV_NAME, .bus_groups = nsim_bus_groups, .probe = nsim_bus_probe, .remove = nsim_bus_remove, .num_vf = nsim_num_vf, }; #define NSIM_BUS_DEV_MAX_VFS 4 static struct nsim_bus_dev * nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues) { struct nsim_bus_dev *nsim_bus_dev; int err; nsim_bus_dev = kzalloc(sizeof(*nsim_bus_dev), GFP_KERNEL); if (!nsim_bus_dev) return ERR_PTR(-ENOMEM); err = ida_alloc_range(&nsim_bus_dev_ids, id, id, GFP_KERNEL); if (err < 0) goto err_nsim_bus_dev_free; nsim_bus_dev->dev.id = err; nsim_bus_dev->dev.bus = &nsim_bus; nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; nsim_bus_dev->num_queues = num_queues; nsim_bus_dev->initial_net = current->nsproxy->net_ns; nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; /* Disallow using nsim_bus_dev */ smp_store_release(&nsim_bus_dev->init, false); err = device_register(&nsim_bus_dev->dev); if (err) goto err_nsim_bus_dev_id_free; return nsim_bus_dev; err_nsim_bus_dev_id_free: ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); put_device(&nsim_bus_dev->dev); nsim_bus_dev = NULL; err_nsim_bus_dev_free: kfree(nsim_bus_dev); return ERR_PTR(err); } static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev) { /* Disallow using nsim_bus_dev */ smp_store_release(&nsim_bus_dev->init, false); ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); device_unregister(&nsim_bus_dev->dev); } static struct device_driver nsim_driver = { .name = DRV_NAME, .bus = &nsim_bus, .owner = THIS_MODULE, }; int nsim_bus_init(void) { int err; err = bus_register(&nsim_bus); if (err) return err; err = driver_register(&nsim_driver); if (err) goto err_bus_unregister; refcount_set(&nsim_bus_devs, 1); /* Allow using resources */ smp_store_release(&nsim_bus_enable, true); return 0; err_bus_unregister: bus_unregister(&nsim_bus); return err; } void nsim_bus_exit(void) { struct nsim_bus_dev *nsim_bus_dev, *tmp; /* Disallow using resources */ smp_store_release(&nsim_bus_enable, false); if (refcount_dec_and_test(&nsim_bus_devs)) complete(&nsim_bus_devs_released); mutex_lock(&nsim_bus_dev_list_lock); list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { list_del(&nsim_bus_dev->list); nsim_bus_dev_del(nsim_bus_dev); } mutex_unlock(&nsim_bus_dev_list_lock); wait_for_completion(&nsim_bus_devs_released); driver_unregister(&nsim_driver); bus_unregister(&nsim_bus); } |
31 446 447 456 454 455 454 447 415 445 447 477 476 413 411 414 412 412 7 403 409 462 7 461 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 | // SPDX-License-Identifier: GPL-2.0 /* * drivers/usb/core/sysfs.c * * (C) Copyright 2002 David Brownell * (C) Copyright 2002,2004 Greg Kroah-Hartman * (C) Copyright 2002,2004 IBM Corp. * * All of the sysfs file attributes for usb devices and interfaces. * * Released under the GPLv2 only. */ #include <linux/kernel.h> #include <linux/kstrtox.h> #include <linux/string.h> #include <linux/usb.h> #include <linux/usb/hcd.h> #include <linux/usb/quirks.h> #include <linux/of.h> #include "usb.h" /* Active configuration fields */ #define usb_actconfig_show(field, format_string) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_device *udev; \ struct usb_host_config *actconfig; \ ssize_t rc; \ \ udev = to_usb_device(dev); \ rc = usb_lock_device_interruptible(udev); \ if (rc < 0) \ return -EINTR; \ actconfig = udev->actconfig; \ if (actconfig) \ rc = sysfs_emit(buf, format_string, \ actconfig->desc.field); \ usb_unlock_device(udev); \ return rc; \ } \ #define usb_actconfig_attr(field, format_string) \ usb_actconfig_show(field, format_string) \ static DEVICE_ATTR_RO(field) usb_actconfig_attr(bNumInterfaces, "%2d\n"); usb_actconfig_attr(bmAttributes, "%2x\n"); static ssize_t bMaxPower_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; struct usb_host_config *actconfig; ssize_t rc; udev = to_usb_device(dev); rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; actconfig = udev->actconfig; if (actconfig) rc = sysfs_emit(buf, "%dmA\n", usb_get_max_power(udev, actconfig)); usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RO(bMaxPower); static ssize_t configuration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; struct usb_host_config *actconfig; ssize_t rc; udev = to_usb_device(dev); rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; actconfig = udev->actconfig; if (actconfig && actconfig->string) rc = sysfs_emit(buf, "%s\n", actconfig->string); usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RO(configuration); /* configuration value is always present, and r/w */ usb_actconfig_show(bConfigurationValue, "%u\n"); static ssize_t bConfigurationValue_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int config, value, rc; if (sscanf(buf, "%d", &config) != 1 || config < -1 || config > 255) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; value = usb_set_configuration(udev, config); usb_unlock_device(udev); return (value < 0) ? value : count; } static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); #ifdef CONFIG_OF static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_node *of_node = dev->of_node; return sysfs_emit(buf, "%pOF\n", of_node); } static DEVICE_ATTR_RO(devspec); #endif /* String fields */ #define usb_string_attr(name) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_device *udev; \ int retval; \ \ udev = to_usb_device(dev); \ retval = usb_lock_device_interruptible(udev); \ if (retval < 0) \ return -EINTR; \ retval = sysfs_emit(buf, "%s\n", udev->name); \ usb_unlock_device(udev); \ return retval; \ } \ static DEVICE_ATTR_RO(name) usb_string_attr(product); usb_string_attr(manufacturer); usb_string_attr(serial); static ssize_t speed_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; char *speed; udev = to_usb_device(dev); switch (udev->speed) { case USB_SPEED_LOW: speed = "1.5"; break; case USB_SPEED_UNKNOWN: case USB_SPEED_FULL: speed = "12"; break; case USB_SPEED_HIGH: speed = "480"; break; case USB_SPEED_SUPER: speed = "5000"; break; case USB_SPEED_SUPER_PLUS: if (udev->ssp_rate == USB_SSP_GEN_2x2) speed = "20000"; else speed = "10000"; break; default: speed = "unknown"; } return sysfs_emit(buf, "%s\n", speed); } static DEVICE_ATTR_RO(speed); static ssize_t rx_lanes_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->rx_lanes); } static DEVICE_ATTR_RO(rx_lanes); static ssize_t tx_lanes_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->tx_lanes); } static DEVICE_ATTR_RO(tx_lanes); static ssize_t busnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->bus->busnum); } static DEVICE_ATTR_RO(busnum); static ssize_t devnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->devnum); } static DEVICE_ATTR_RO(devnum); static ssize_t devpath_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%s\n", udev->devpath); } static DEVICE_ATTR_RO(devpath); static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; u16 bcdUSB; udev = to_usb_device(dev); bcdUSB = le16_to_cpu(udev->descriptor.bcdUSB); return sysfs_emit(buf, "%2x.%02x\n", bcdUSB >> 8, bcdUSB & 0xff); } static DEVICE_ATTR_RO(version); static ssize_t maxchild_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->maxchild); } static DEVICE_ATTR_RO(maxchild); static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "0x%x\n", udev->quirks); } static DEVICE_ATTR_RO(quirks); static ssize_t avoid_reset_quirk_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", !!(udev->quirks & USB_QUIRK_RESET)); } static ssize_t avoid_reset_quirk_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int val, rc; if (sscanf(buf, "%d", &val) != 1 || val < 0 || val > 1) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (val) udev->quirks |= USB_QUIRK_RESET; else udev->quirks &= ~USB_QUIRK_RESET; usb_unlock_device(udev); return count; } static DEVICE_ATTR_RW(avoid_reset_quirk); static ssize_t urbnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", atomic_read(&udev->urbnum)); } static DEVICE_ATTR_RO(urbnum); static ssize_t ltm_capable_show(struct device *dev, struct device_attribute *attr, char *buf) { if (usb_device_supports_ltm(to_usb_device(dev))) return sysfs_emit(buf, "%s\n", "yes"); return sysfs_emit(buf, "%s\n", "no"); } static DEVICE_ATTR_RO(ltm_capable); #ifdef CONFIG_PM static ssize_t persist_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->persist_enabled); } static ssize_t persist_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int value, rc; /* Hubs are always enabled for USB_PERSIST */ if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) return -EPERM; if (sscanf(buf, "%d", &value) != 1) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; udev->persist_enabled = !!value; usb_unlock_device(udev); return count; } static DEVICE_ATTR_RW(persist); static int add_persist_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); /* Hubs are automatically enabled for USB_PERSIST, * no point in creating the attribute file. */ if (udev->descriptor.bDeviceClass != USB_CLASS_HUB) rc = sysfs_add_file_to_group(&dev->kobj, &dev_attr_persist.attr, power_group_name); } return rc; } static void remove_persist_attributes(struct device *dev) { sysfs_remove_file_from_group(&dev->kobj, &dev_attr_persist.attr, power_group_name); } static ssize_t connected_duration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%u\n", jiffies_to_msecs(jiffies - udev->connect_time)); } static DEVICE_ATTR_RO(connected_duration); /* * If the device is resumed, the last time the device was suspended has * been pre-subtracted from active_duration. We add the current time to * get the duration that the device was actually active. * * If the device is suspended, the active_duration is up-to-date. */ static ssize_t active_duration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); int duration; if (udev->state != USB_STATE_SUSPENDED) duration = jiffies_to_msecs(jiffies + udev->active_duration); else duration = jiffies_to_msecs(udev->active_duration); return sysfs_emit(buf, "%u\n", duration); } static DEVICE_ATTR_RO(active_duration); static ssize_t autosuspend_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay / 1000); } static ssize_t autosuspend_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int value; if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/1000 || value <= -INT_MAX/1000) return -EINVAL; pm_runtime_set_autosuspend_delay(dev, value * 1000); return count; } static DEVICE_ATTR_RW(autosuspend); static const char on_string[] = "on"; static const char auto_string[] = "auto"; static void warn_level(void) { static int level_warned; if (!level_warned) { level_warned = 1; printk(KERN_WARNING "WARNING! power/level is deprecated; " "use power/control instead\n"); } } static ssize_t level_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p = auto_string; warn_level(); if (udev->state != USB_STATE_SUSPENDED && !udev->dev.power.runtime_auto) p = on_string; return sysfs_emit(buf, "%s\n", p); } static ssize_t level_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int len = count; char *cp; int rc = count; int rv; warn_level(); cp = memchr(buf, '\n', count); if (cp) len = cp - buf; rv = usb_lock_device_interruptible(udev); if (rv < 0) return -EINTR; if (len == sizeof on_string - 1 && strncmp(buf, on_string, len) == 0) usb_disable_autosuspend(udev); else if (len == sizeof auto_string - 1 && strncmp(buf, auto_string, len) == 0) usb_enable_autosuspend(udev); else rc = -EINVAL; usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RW(level); static ssize_t usb2_hardware_lpm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; if (udev->usb2_hw_lpm_allowed == 1) p = "enabled"; else p = "disabled"; return sysfs_emit(buf, "%s\n", p); } static ssize_t usb2_hardware_lpm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); bool value; int ret; ret = usb_lock_device_interruptible(udev); if (ret < 0) return -EINTR; ret = kstrtobool(buf, &value); if (!ret) { udev->usb2_hw_lpm_allowed = value; if (value) ret = usb_enable_usb2_hardware_lpm(udev); else ret = usb_disable_usb2_hardware_lpm(udev); } usb_unlock_device(udev); if (!ret) return count; return ret; } static DEVICE_ATTR_RW(usb2_hardware_lpm); static ssize_t usb2_lpm_l1_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->l1_params.timeout); } static ssize_t usb2_lpm_l1_timeout_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); u16 timeout; if (kstrtou16(buf, 0, &timeout)) return -EINVAL; udev->l1_params.timeout = timeout; return count; } static DEVICE_ATTR_RW(usb2_lpm_l1_timeout); static ssize_t usb2_lpm_besl_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->l1_params.besl); } static ssize_t usb2_lpm_besl_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); u8 besl; if (kstrtou8(buf, 0, &besl) || besl > 15) return -EINVAL; udev->l1_params.besl = besl; return count; } static DEVICE_ATTR_RW(usb2_lpm_besl); static ssize_t usb3_hardware_lpm_u1_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; int rc; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (udev->usb3_lpm_u1_enabled) p = "enabled"; else p = "disabled"; usb_unlock_device(udev); return sysfs_emit(buf, "%s\n", p); } static DEVICE_ATTR_RO(usb3_hardware_lpm_u1); static ssize_t usb3_hardware_lpm_u2_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; int rc; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (udev->usb3_lpm_u2_enabled) p = "enabled"; else p = "disabled"; usb_unlock_device(udev); return sysfs_emit(buf, "%s\n", p); } static DEVICE_ATTR_RO(usb3_hardware_lpm_u2); static struct attribute *usb2_hardware_lpm_attr[] = { &dev_attr_usb2_hardware_lpm.attr, &dev_attr_usb2_lpm_l1_timeout.attr, &dev_attr_usb2_lpm_besl.attr, NULL, }; static const struct attribute_group usb2_hardware_lpm_attr_group = { .name = power_group_name, .attrs = usb2_hardware_lpm_attr, }; static struct attribute *usb3_hardware_lpm_attr[] = { &dev_attr_usb3_hardware_lpm_u1.attr, &dev_attr_usb3_hardware_lpm_u2.attr, NULL, }; static const struct attribute_group usb3_hardware_lpm_attr_group = { .name = power_group_name, .attrs = usb3_hardware_lpm_attr, }; static struct attribute *power_attrs[] = { &dev_attr_autosuspend.attr, &dev_attr_level.attr, &dev_attr_connected_duration.attr, &dev_attr_active_duration.attr, NULL, }; static const struct attribute_group power_attr_group = { .name = power_group_name, .attrs = power_attrs, }; static int add_power_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); rc = sysfs_merge_group(&dev->kobj, &power_attr_group); if (udev->usb2_hw_lpm_capable == 1) rc = sysfs_merge_group(&dev->kobj, &usb2_hardware_lpm_attr_group); if ((udev->speed == USB_SPEED_SUPER || udev->speed == USB_SPEED_SUPER_PLUS) && udev->lpm_capable == 1) rc = sysfs_merge_group(&dev->kobj, &usb3_hardware_lpm_attr_group); } return rc; } static void remove_power_attributes(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &usb2_hardware_lpm_attr_group); sysfs_unmerge_group(&dev->kobj, &power_attr_group); } #else #define add_persist_attributes(dev) 0 #define remove_persist_attributes(dev) do {} while (0) #define add_power_attributes(dev) 0 #define remove_power_attributes(dev) do {} while (0) #endif /* CONFIG_PM */ /* Descriptor fields */ #define usb_descriptor_attr_le16(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_device *udev; \ \ udev = to_usb_device(dev); \ return sysfs_emit(buf, format_string, \ le16_to_cpu(udev->descriptor.field)); \ } \ static DEVICE_ATTR_RO(field) usb_descriptor_attr_le16(idVendor, "%04x\n"); usb_descriptor_attr_le16(idProduct, "%04x\n"); usb_descriptor_attr_le16(bcdDevice, "%04x\n"); #define usb_descriptor_attr(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_device *udev; \ \ udev = to_usb_device(dev); \ return sysfs_emit(buf, format_string, udev->descriptor.field); \ } \ static DEVICE_ATTR_RO(field) usb_descriptor_attr(bDeviceClass, "%02x\n"); usb_descriptor_attr(bDeviceSubClass, "%02x\n"); usb_descriptor_attr(bDeviceProtocol, "%02x\n"); usb_descriptor_attr(bNumConfigurations, "%d\n"); usb_descriptor_attr(bMaxPacketSize0, "%d\n"); /* show if the device is authorized (1) or not (0) */ static ssize_t authorized_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *usb_dev = to_usb_device(dev); return sysfs_emit(buf, "%u\n", usb_dev->authorized); } /* * Authorize a device to be used in the system * * Writing a 0 deauthorizes the device, writing a 1 authorizes it. */ static ssize_t authorized_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { ssize_t result; struct usb_device *usb_dev = to_usb_device(dev); unsigned val; result = sscanf(buf, "%u\n", &val); if (result != 1) result = -EINVAL; else if (val == 0) result = usb_deauthorize_device(usb_dev); else result = usb_authorize_device(usb_dev); return result < 0 ? result : size; } static DEVICE_ATTR_IGNORE_LOCKDEP(authorized, S_IRUGO | S_IWUSR, authorized_show, authorized_store); /* "Safely remove a device" */ static ssize_t remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int rc = 0; usb_lock_device(udev); if (udev->state != USB_STATE_NOTATTACHED) { /* To avoid races, first unconfigure and then remove */ usb_set_configuration(udev, -1); rc = usb_remove_device(udev); } if (rc == 0) rc = count; usb_unlock_device(udev); return rc; } static DEVICE_ATTR_IGNORE_LOCKDEP(remove, S_IWUSR, NULL, remove_store); static struct attribute *dev_attrs[] = { /* current configuration's attributes */ &dev_attr_configuration.attr, &dev_attr_bNumInterfaces.attr, &dev_attr_bConfigurationValue.attr, &dev_attr_bmAttributes.attr, &dev_attr_bMaxPower.attr, /* device attributes */ &dev_attr_urbnum.attr, &dev_attr_idVendor.attr, &dev_attr_idProduct.attr, &dev_attr_bcdDevice.attr, &dev_attr_bDeviceClass.attr, &dev_attr_bDeviceSubClass.attr, &dev_attr_bDeviceProtocol.attr, &dev_attr_bNumConfigurations.attr, &dev_attr_bMaxPacketSize0.attr, &dev_attr_speed.attr, &dev_attr_rx_lanes.attr, &dev_attr_tx_lanes.attr, &dev_attr_busnum.attr, &dev_attr_devnum.attr, &dev_attr_devpath.attr, &dev_attr_version.attr, &dev_attr_maxchild.attr, &dev_attr_quirks.attr, &dev_attr_avoid_reset_quirk.attr, &dev_attr_authorized.attr, &dev_attr_remove.attr, &dev_attr_ltm_capable.attr, #ifdef CONFIG_OF &dev_attr_devspec.attr, #endif NULL, }; static const struct attribute_group dev_attr_grp = { .attrs = dev_attrs, }; /* When modifying this list, be sure to modify dev_string_attrs_are_visible() * accordingly. */ static struct attribute *dev_string_attrs[] = { &dev_attr_manufacturer.attr, &dev_attr_product.attr, &dev_attr_serial.attr, NULL }; static umode_t dev_string_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct usb_device *udev = to_usb_device(dev); if (a == &dev_attr_manufacturer.attr) { if (udev->manufacturer == NULL) return 0; } else if (a == &dev_attr_product.attr) { if (udev->product == NULL) return 0; } else if (a == &dev_attr_serial.attr) { if (udev->serial == NULL) return 0; } return a->mode; } static const struct attribute_group dev_string_attr_grp = { .attrs = dev_string_attrs, .is_visible = dev_string_attrs_are_visible, }; const struct attribute_group *usb_device_groups[] = { &dev_attr_grp, &dev_string_attr_grp, NULL }; /* Binary descriptors */ static ssize_t read_descriptors(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); struct usb_device *udev = to_usb_device(dev); size_t nleft = count; size_t srclen, n; int cfgno; void *src; /* The binary attribute begins with the device descriptor. * Following that are the raw descriptor entries for all the * configurations (config plus subsidiary descriptors). */ for (cfgno = -1; cfgno < udev->descriptor.bNumConfigurations && nleft > 0; ++cfgno) { if (cfgno < 0) { src = &udev->descriptor; srclen = sizeof(struct usb_device_descriptor); } else { src = udev->rawdescriptors[cfgno]; srclen = __le16_to_cpu(udev->config[cfgno].desc. wTotalLength); } if (off < srclen) { n = min(nleft, srclen - (size_t) off); memcpy(buf, src + off, n); nleft -= n; buf += n; off = 0; } else { off -= srclen; } } return count - nleft; } static struct bin_attribute dev_bin_attr_descriptors = { .attr = {.name = "descriptors", .mode = 0444}, .read = read_descriptors, .size = 18 + 65535, /* dev descr + max-size raw descriptor */ }; /* * Show & store the current value of authorized_default */ static ssize_t authorized_default_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *rh_usb_dev = to_usb_device(dev); struct usb_bus *usb_bus = rh_usb_dev->bus; struct usb_hcd *hcd; hcd = bus_to_hcd(usb_bus); return sysfs_emit(buf, "%u\n", hcd->dev_policy); } static ssize_t authorized_default_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { ssize_t result; unsigned int val; struct usb_device *rh_usb_dev = to_usb_device(dev); struct usb_bus *usb_bus = rh_usb_dev->bus; struct usb_hcd *hcd; hcd = bus_to_hcd(usb_bus); result = sscanf(buf, "%u\n", &val); if (result == 1) { hcd->dev_policy = val <= USB_DEVICE_AUTHORIZE_INTERNAL ? val : USB_DEVICE_AUTHORIZE_ALL; result = size; } else { result = -EINVAL; } return result; } static DEVICE_ATTR_RW(authorized_default); /* * interface_authorized_default_show - show default authorization status * for USB interfaces * * note: interface_authorized_default is the default value * for initializing the authorized attribute of interfaces */ static ssize_t interface_authorized_default_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *usb_dev = to_usb_device(dev); struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); return sysfs_emit(buf, "%u\n", !!HCD_INTF_AUTHORIZED(hcd)); } /* * interface_authorized_default_store - store default authorization status * for USB interfaces * * note: interface_authorized_default is the default value * for initializing the authorized attribute of interfaces */ static ssize_t interface_authorized_default_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *usb_dev = to_usb_device(dev); struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); int rc = count; bool val; if (kstrtobool(buf, &val) != 0) return -EINVAL; if (val) set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); else clear_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); return rc; } static DEVICE_ATTR_RW(interface_authorized_default); /* Group all the USB bus attributes */ static struct attribute *usb_bus_attrs[] = { &dev_attr_authorized_default.attr, &dev_attr_interface_authorized_default.attr, NULL, }; static const struct attribute_group usb_bus_attr_group = { .name = NULL, /* we want them in the same directory */ .attrs = usb_bus_attrs, }; static int add_default_authorized_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) rc = sysfs_create_group(&dev->kobj, &usb_bus_attr_group); return rc; } static void remove_default_authorized_attributes(struct device *dev) { if (is_usb_device(dev)) { sysfs_remove_group(&dev->kobj, &usb_bus_attr_group); } } int usb_create_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; int retval; retval = device_create_bin_file(dev, &dev_bin_attr_descriptors); if (retval) goto error; retval = add_persist_attributes(dev); if (retval) goto error; retval = add_power_attributes(dev); if (retval) goto error; if (is_root_hub(udev)) { retval = add_default_authorized_attributes(dev); if (retval) goto error; } return retval; error: usb_remove_sysfs_dev_files(udev); return retval; } void usb_remove_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; if (is_root_hub(udev)) remove_default_authorized_attributes(dev); remove_power_attributes(dev); remove_persist_attributes(dev); device_remove_bin_file(dev, &dev_bin_attr_descriptors); } /* Interface Association Descriptor fields */ #define usb_intf_assoc_attr(field, format_string) \ static ssize_t \ iad_##field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ \ return sysfs_emit(buf, format_string, \ intf->intf_assoc->field); \ } \ static DEVICE_ATTR_RO(iad_##field) usb_intf_assoc_attr(bFirstInterface, "%02x\n"); usb_intf_assoc_attr(bInterfaceCount, "%02d\n"); usb_intf_assoc_attr(bFunctionClass, "%02x\n"); usb_intf_assoc_attr(bFunctionSubClass, "%02x\n"); usb_intf_assoc_attr(bFunctionProtocol, "%02x\n"); /* Interface fields */ #define usb_intf_attr(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ \ return sysfs_emit(buf, format_string, \ intf->cur_altsetting->desc.field); \ } \ static DEVICE_ATTR_RO(field) usb_intf_attr(bInterfaceNumber, "%02x\n"); usb_intf_attr(bAlternateSetting, "%2d\n"); usb_intf_attr(bNumEndpoints, "%02x\n"); usb_intf_attr(bInterfaceClass, "%02x\n"); usb_intf_attr(bInterfaceSubClass, "%02x\n"); usb_intf_attr(bInterfaceProtocol, "%02x\n"); static ssize_t interface_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf; char *string; intf = to_usb_interface(dev); string = READ_ONCE(intf->cur_altsetting->string); if (!string) return 0; return sysfs_emit(buf, "%s\n", string); } static DEVICE_ATTR_RO(interface); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf; struct usb_device *udev; struct usb_host_interface *alt; intf = to_usb_interface(dev); udev = interface_to_usbdev(intf); alt = READ_ONCE(intf->cur_altsetting); return sysfs_emit(buf, "usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02X" "ic%02Xisc%02Xip%02Xin%02X\n", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct), le16_to_cpu(udev->descriptor.bcdDevice), udev->descriptor.bDeviceClass, udev->descriptor.bDeviceSubClass, udev->descriptor.bDeviceProtocol, alt->desc.bInterfaceClass, alt->desc.bInterfaceSubClass, alt->desc.bInterfaceProtocol, alt->desc.bInterfaceNumber); } static DEVICE_ATTR_RO(modalias); static ssize_t supports_autosuspend_show(struct device *dev, struct device_attribute *attr, char *buf) { int s; s = device_lock_interruptible(dev); if (s < 0) return -EINTR; /* Devices will be autosuspended even when an interface isn't claimed */ s = (!dev->driver || to_usb_driver(dev->driver)->supports_autosuspend); device_unlock(dev); return sysfs_emit(buf, "%u\n", s); } static DEVICE_ATTR_RO(supports_autosuspend); /* * interface_authorized_show - show authorization status of an USB interface * 1 is authorized, 0 is deauthorized */ static ssize_t interface_authorized_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); return sysfs_emit(buf, "%u\n", intf->authorized); } /* * interface_authorized_store - authorize or deauthorize an USB interface */ static ssize_t interface_authorized_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); bool val; if (kstrtobool(buf, &val) != 0) return -EINVAL; if (val) usb_authorize_interface(intf); else usb_deauthorize_interface(intf); return count; } static struct device_attribute dev_attr_interface_authorized = __ATTR(authorized, S_IRUGO | S_IWUSR, interface_authorized_show, interface_authorized_store); static struct attribute *intf_attrs[] = { &dev_attr_bInterfaceNumber.attr, &dev_attr_bAlternateSetting.attr, &dev_attr_bNumEndpoints.attr, &dev_attr_bInterfaceClass.attr, &dev_attr_bInterfaceSubClass.attr, &dev_attr_bInterfaceProtocol.attr, &dev_attr_modalias.attr, &dev_attr_supports_autosuspend.attr, &dev_attr_interface_authorized.attr, NULL, }; static const struct attribute_group intf_attr_grp = { .attrs = intf_attrs, }; static struct attribute *intf_assoc_attrs[] = { &dev_attr_iad_bFirstInterface.attr, &dev_attr_iad_bInterfaceCount.attr, &dev_attr_iad_bFunctionClass.attr, &dev_attr_iad_bFunctionSubClass.attr, &dev_attr_iad_bFunctionProtocol.attr, NULL, }; static umode_t intf_assoc_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct usb_interface *intf = to_usb_interface(dev); if (intf->intf_assoc == NULL) return 0; return a->mode; } static const struct attribute_group intf_assoc_attr_grp = { .attrs = intf_assoc_attrs, .is_visible = intf_assoc_attrs_are_visible, }; static ssize_t wireless_status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf; intf = to_usb_interface(dev); if (intf->wireless_status == USB_WIRELESS_STATUS_DISCONNECTED) return sysfs_emit(buf, "%s\n", "disconnected"); return sysfs_emit(buf, "%s\n", "connected"); } static DEVICE_ATTR_RO(wireless_status); static struct attribute *intf_wireless_status_attrs[] = { &dev_attr_wireless_status.attr, NULL }; static umode_t intf_wireless_status_attr_is_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct usb_interface *intf = to_usb_interface(dev); if (a != &dev_attr_wireless_status.attr || intf->wireless_status != USB_WIRELESS_STATUS_NA) return a->mode; return 0; } static const struct attribute_group intf_wireless_status_attr_grp = { .attrs = intf_wireless_status_attrs, .is_visible = intf_wireless_status_attr_is_visible, }; int usb_update_wireless_status_attr(struct usb_interface *intf) { struct device *dev = &intf->dev; int ret; ret = sysfs_update_group(&dev->kobj, &intf_wireless_status_attr_grp); if (ret < 0) return ret; sysfs_notify(&dev->kobj, NULL, "wireless_status"); kobject_uevent(&dev->kobj, KOBJ_CHANGE); return 0; } const struct attribute_group *usb_interface_groups[] = { &intf_attr_grp, &intf_assoc_attr_grp, &intf_wireless_status_attr_grp, NULL }; void usb_create_sysfs_intf_files(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_host_interface *alt = intf->cur_altsetting; if (intf->sysfs_files_created || intf->unregistering) return; if (!alt->string && !(udev->quirks & USB_QUIRK_CONFIG_INTF_STRINGS)) alt->string = usb_cache_string(udev, alt->desc.iInterface); if (alt->string && device_create_file(&intf->dev, &dev_attr_interface)) { /* This is not a serious error */ dev_dbg(&intf->dev, "interface string descriptor file not created\n"); } intf->sysfs_files_created = 1; } void usb_remove_sysfs_intf_files(struct usb_interface *intf) { if (!intf->sysfs_files_created) return; device_remove_file(&intf->dev, &dev_attr_interface); intf->sysfs_files_created = 0; } |
1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2014 Michael Krufky (mkrufky@linuxtv.org) * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include <linux/vmalloc.h> #include <linux/i2c.h> #include <media/tuner.h> #include "mxl111sf.h" #include "mxl111sf-reg.h" #include "mxl111sf-phy.h" #include "mxl111sf-i2c.h" #include "mxl111sf-gpio.h" #include "mxl111sf-demod.h" #include "mxl111sf-tuner.h" #include "lgdt3305.h" #include "lg2160.h" int dvb_usb_mxl111sf_debug; module_param_named(debug, dvb_usb_mxl111sf_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=info, 2=xfer, 4=i2c, 8=reg, 16=adv (or-able))."); static int dvb_usb_mxl111sf_isoc; module_param_named(isoc, dvb_usb_mxl111sf_isoc, int, 0644); MODULE_PARM_DESC(isoc, "enable usb isoc xfer (0=bulk, 1=isoc)."); static int dvb_usb_mxl111sf_spi; module_param_named(spi, dvb_usb_mxl111sf_spi, int, 0644); MODULE_PARM_DESC(spi, "use spi rather than tp for data xfer (0=tp, 1=spi)."); #define ANT_PATH_AUTO 0 #define ANT_PATH_EXTERNAL 1 #define ANT_PATH_INTERNAL 2 static int dvb_usb_mxl111sf_rfswitch = #if 0 ANT_PATH_AUTO; #else ANT_PATH_EXTERNAL; #endif module_param_named(rfswitch, dvb_usb_mxl111sf_rfswitch, int, 0644); MODULE_PARM_DESC(rfswitch, "force rf switch position (0=auto, 1=ext, 2=int)."); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); int mxl111sf_ctrl_msg(struct mxl111sf_state *state, u8 cmd, u8 *wbuf, int wlen, u8 *rbuf, int rlen) { struct dvb_usb_device *d = state->d; int wo = (rbuf == NULL || rlen == 0); /* write-only */ int ret; if (1 + wlen > MXL_MAX_XFER_SIZE) { pr_warn("%s: len=%d is too big!\n", __func__, wlen); return -EOPNOTSUPP; } pr_debug("%s(wlen = %d, rlen = %d)\n", __func__, wlen, rlen); mutex_lock(&state->msg_lock); memset(state->sndbuf, 0, 1+wlen); memset(state->rcvbuf, 0, rlen); state->sndbuf[0] = cmd; memcpy(&state->sndbuf[1], wbuf, wlen); ret = (wo) ? dvb_usbv2_generic_write(d, state->sndbuf, 1+wlen) : dvb_usbv2_generic_rw(d, state->sndbuf, 1+wlen, state->rcvbuf, rlen); if (rbuf) memcpy(rbuf, state->rcvbuf, rlen); mutex_unlock(&state->msg_lock); mxl_fail(ret); return ret; } /* ------------------------------------------------------------------------ */ #define MXL_CMD_REG_READ 0xaa #define MXL_CMD_REG_WRITE 0x55 int mxl111sf_read_reg(struct mxl111sf_state *state, u8 addr, u8 *data) { u8 buf[2]; int ret; ret = mxl111sf_ctrl_msg(state, MXL_CMD_REG_READ, &addr, 1, buf, 2); if (mxl_fail(ret)) { mxl_debug("error reading reg: 0x%02x", addr); goto fail; } if (buf[0] == addr) *data = buf[1]; else { pr_err("invalid response reading reg: 0x%02x != 0x%02x, 0x%02x", addr, buf[0], buf[1]); ret = -EINVAL; } pr_debug("R: (0x%02x, 0x%02x)\n", addr, buf[1]); fail: return ret; } int mxl111sf_write_reg(struct mxl111sf_state *state, u8 addr, u8 data) { u8 buf[] = { addr, data }; int ret; pr_debug("W: (0x%02x, 0x%02x)\n", addr, data); ret = mxl111sf_ctrl_msg(state, MXL_CMD_REG_WRITE, buf, 2, NULL, 0); if (mxl_fail(ret)) pr_err("error writing reg: 0x%02x, val: 0x%02x", addr, data); return ret; } /* ------------------------------------------------------------------------ */ int mxl111sf_write_reg_mask(struct mxl111sf_state *state, u8 addr, u8 mask, u8 data) { int ret; u8 val = 0; if (mask != 0xff) { ret = mxl111sf_read_reg(state, addr, &val); #if 1 /* don't know why this usually errors out on the first try */ if (mxl_fail(ret)) pr_err("error writing addr: 0x%02x, mask: 0x%02x, data: 0x%02x, retrying...", addr, mask, data); ret = mxl111sf_read_reg(state, addr, &val); #endif if (mxl_fail(ret)) goto fail; } val &= ~mask; val |= data; ret = mxl111sf_write_reg(state, addr, val); mxl_fail(ret); fail: return ret; } /* ------------------------------------------------------------------------ */ int mxl111sf_ctrl_program_regs(struct mxl111sf_state *state, struct mxl111sf_reg_ctrl_info *ctrl_reg_info) { int i, ret = 0; for (i = 0; ctrl_reg_info[i].addr | ctrl_reg_info[i].mask | ctrl_reg_info[i].data; i++) { ret = mxl111sf_write_reg_mask(state, ctrl_reg_info[i].addr, ctrl_reg_info[i].mask, ctrl_reg_info[i].data); if (mxl_fail(ret)) { pr_err("failed on reg #%d (0x%02x)", i, ctrl_reg_info[i].addr); break; } } return ret; } /* ------------------------------------------------------------------------ */ static int mxl1x1sf_get_chip_info(struct mxl111sf_state *state) { int ret; u8 id, ver; char *mxl_chip, *mxl_rev; if ((state->chip_id) && (state->chip_ver)) return 0; ret = mxl111sf_read_reg(state, CHIP_ID_REG, &id); if (mxl_fail(ret)) goto fail; state->chip_id = id; ret = mxl111sf_read_reg(state, TOP_CHIP_REV_ID_REG, &ver); if (mxl_fail(ret)) goto fail; state->chip_ver = ver; switch (id) { case 0x61: mxl_chip = "MxL101SF"; break; case 0x63: mxl_chip = "MxL111SF"; break; default: mxl_chip = "UNKNOWN MxL1X1"; break; } switch (ver) { case 0x36: state->chip_rev = MXL111SF_V6; mxl_rev = "v6"; break; case 0x08: state->chip_rev = MXL111SF_V8_100; mxl_rev = "v8_100"; break; case 0x18: state->chip_rev = MXL111SF_V8_200; mxl_rev = "v8_200"; break; default: state->chip_rev = 0; mxl_rev = "UNKNOWN REVISION"; break; } pr_info("%s detected, %s (0x%x)", mxl_chip, mxl_rev, ver); fail: return ret; } #define get_chip_info(state) \ ({ \ int ___ret; \ ___ret = mxl1x1sf_get_chip_info(state); \ if (mxl_fail(___ret)) { \ mxl_debug("failed to get chip info" \ " on first probe attempt"); \ ___ret = mxl1x1sf_get_chip_info(state); \ if (mxl_fail(___ret)) \ pr_err("failed to get chip info during probe"); \ else \ mxl_debug("probe needed a retry " \ "in order to succeed."); \ } \ ___ret; \ }) /* ------------------------------------------------------------------------ */ #if 0 static int mxl111sf_power_ctrl(struct dvb_usb_device *d, int onoff) { /* power control depends on which adapter is being woken: * save this for init, instead, via mxl111sf_adap_fe_init */ return 0; } #endif static int mxl111sf_adap_fe_init(struct dvb_frontend *fe) { struct dvb_usb_device *d = fe_to_d(fe); struct mxl111sf_state *state = fe_to_priv(fe); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe->id]; int err; /* exit if we didn't initialize the driver yet */ if (!state->chip_id) { mxl_debug("driver not yet initialized, exit."); goto fail; } pr_debug("%s()\n", __func__); mutex_lock(&state->fe_lock); state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); err = mxl1x1sf_soft_reset(state); mxl_fail(err); err = mxl111sf_init_tuner_demod(state); mxl_fail(err); err = mxl1x1sf_set_device_mode(state, adap_state->device_mode); mxl_fail(err); err = mxl111sf_enable_usb_output(state); mxl_fail(err); err = mxl1x1sf_top_master_ctrl(state, 1); mxl_fail(err); if ((MXL111SF_GPIO_MOD_DVBT != adap_state->gpio_mode) && (state->chip_rev > MXL111SF_V6)) { mxl111sf_config_pin_mux_modes(state, PIN_MUX_TS_SPI_IN_MODE_1); mxl_fail(err); } err = mxl111sf_init_port_expander(state); if (!mxl_fail(err)) { state->gpio_mode = adap_state->gpio_mode; err = mxl111sf_gpio_mode_switch(state, state->gpio_mode); mxl_fail(err); #if 0 err = fe->ops.init(fe); #endif msleep(100); /* add short delay after enabling * the demod before touching it */ } return (adap_state->fe_init) ? adap_state->fe_init(fe) : 0; fail: return -ENODEV; } static int mxl111sf_adap_fe_sleep(struct dvb_frontend *fe) { struct mxl111sf_state *state = fe_to_priv(fe); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe->id]; int err; /* exit if we didn't initialize the driver yet */ if (!state->chip_id) { mxl_debug("driver not yet initialized, exit."); goto fail; } pr_debug("%s()\n", __func__); err = (adap_state->fe_sleep) ? adap_state->fe_sleep(fe) : 0; mutex_unlock(&state->fe_lock); return err; fail: return -ENODEV; } static int mxl111sf_ep6_streaming_ctrl(struct dvb_frontend *fe, int onoff) { struct mxl111sf_state *state = fe_to_priv(fe); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe->id]; int ret = 0; pr_debug("%s(%d)\n", __func__, onoff); if (onoff) { ret = mxl111sf_enable_usb_output(state); mxl_fail(ret); ret = mxl111sf_config_mpeg_in(state, 1, 1, adap_state->ep6_clockphase, 0, 0); mxl_fail(ret); #if 0 } else { ret = mxl111sf_disable_656_port(state); mxl_fail(ret); #endif } return ret; } static int mxl111sf_ep5_streaming_ctrl(struct dvb_frontend *fe, int onoff) { struct mxl111sf_state *state = fe_to_priv(fe); int ret = 0; pr_debug("%s(%d)\n", __func__, onoff); if (onoff) { ret = mxl111sf_enable_usb_output(state); mxl_fail(ret); ret = mxl111sf_init_i2s_port(state, 200); mxl_fail(ret); ret = mxl111sf_config_i2s(state, 0, 15); mxl_fail(ret); } else { ret = mxl111sf_disable_i2s_port(state); mxl_fail(ret); } if (state->chip_rev > MXL111SF_V6) ret = mxl111sf_config_spi(state, onoff); mxl_fail(ret); return ret; } static int mxl111sf_ep4_streaming_ctrl(struct dvb_frontend *fe, int onoff) { struct mxl111sf_state *state = fe_to_priv(fe); int ret = 0; pr_debug("%s(%d)\n", __func__, onoff); if (onoff) { ret = mxl111sf_enable_usb_output(state); mxl_fail(ret); } return ret; } /* ------------------------------------------------------------------------ */ static struct lgdt3305_config hauppauge_lgdt3305_config = { .i2c_addr = 0xb2 >> 1, .mpeg_mode = LGDT3305_MPEG_SERIAL, .tpclk_edge = LGDT3305_TPCLK_RISING_EDGE, .tpvalid_polarity = LGDT3305_TP_VALID_HIGH, .deny_i2c_rptr = 1, .spectral_inversion = 0, .qam_if_khz = 6000, .vsb_if_khz = 6000, }; static int mxl111sf_lgdt3305_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_ATSC; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lgdt3305_attach, &hauppauge_lgdt3305_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static struct lg2160_config hauppauge_lg2160_config = { .lg_chip = LG2160, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, }; static int mxl111sf_lg2160_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_MH; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; ret = get_chip_info(state); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lg2160_attach, &hauppauge_lg2160_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static struct lg2160_config hauppauge_lg2161_1019_config = { .lg_chip = LG2161_1019, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 2, /* LG2161_OIF_SPI_MAS */ }; static struct lg2160_config hauppauge_lg2161_1040_config = { .lg_chip = LG2161_1040, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 4, /* LG2161_OIF_SPI_MAS */ }; static int mxl111sf_lg2161_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_MH; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; ret = get_chip_info(state); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lg2160_attach, (MXL111SF_V8_200 == state->chip_rev) ? &hauppauge_lg2161_1040_config : &hauppauge_lg2161_1019_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static struct lg2160_config hauppauge_lg2161_1019_ep6_config = { .lg_chip = LG2161_1019, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 1, /* LG2161_OIF_SERIAL_TS */ }; static struct lg2160_config hauppauge_lg2161_1040_ep6_config = { .lg_chip = LG2161_1040, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 7, /* LG2161_OIF_SERIAL_TS */ }; static int mxl111sf_lg2161_ep6_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_MH; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 0; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; ret = get_chip_info(state); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lg2160_attach, (MXL111SF_V8_200 == state->chip_rev) ? &hauppauge_lg2161_1040_ep6_config : &hauppauge_lg2161_1019_ep6_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static const struct mxl111sf_demod_config mxl_demod_config = { .read_reg = mxl111sf_read_reg, .write_reg = mxl111sf_write_reg, .program_regs = mxl111sf_ctrl_program_regs, }; static int mxl111sf_attach_demod(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 1 : 2; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_DVBT; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_SOC_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; /* don't care if this fails */ mxl111sf_init_port_expander(state); adap->fe[fe_id] = dvb_attach(mxl111sf_demod_attach, state, &mxl_demod_config); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static inline int mxl111sf_set_ant_path(struct mxl111sf_state *state, int antpath) { return mxl111sf_idac_config(state, 1, 1, (antpath == ANT_PATH_INTERNAL) ? 0x3f : 0x00, 0); } #define DbgAntHunt(x, pwr0, pwr1, pwr2, pwr3) \ pr_err("%s(%d) FINAL input set to %s rxPwr:%d|%d|%d|%d\n", \ __func__, __LINE__, \ (ANT_PATH_EXTERNAL == x) ? "EXTERNAL" : "INTERNAL", \ pwr0, pwr1, pwr2, pwr3) #define ANT_HUNT_SLEEP 90 #define ANT_EXT_TWEAK 0 static int mxl111sf_ant_hunt(struct dvb_frontend *fe) { struct mxl111sf_state *state = fe_to_priv(fe); int antctrl = dvb_usb_mxl111sf_rfswitch; u16 rxPwrA, rxPwr0, rxPwr1, rxPwr2; /* FIXME: must force EXTERNAL for QAM - done elsewhere */ mxl111sf_set_ant_path(state, antctrl == ANT_PATH_AUTO ? ANT_PATH_EXTERNAL : antctrl); if (antctrl == ANT_PATH_AUTO) { #if 0 msleep(ANT_HUNT_SLEEP); #endif fe->ops.tuner_ops.get_rf_strength(fe, &rxPwrA); mxl111sf_set_ant_path(state, ANT_PATH_EXTERNAL); msleep(ANT_HUNT_SLEEP); fe->ops.tuner_ops.get_rf_strength(fe, &rxPwr0); mxl111sf_set_ant_path(state, ANT_PATH_EXTERNAL); msleep(ANT_HUNT_SLEEP); fe->ops.tuner_ops.get_rf_strength(fe, &rxPwr1); mxl111sf_set_ant_path(state, ANT_PATH_INTERNAL); msleep(ANT_HUNT_SLEEP); fe->ops.tuner_ops.get_rf_strength(fe, &rxPwr2); if (rxPwr1+ANT_EXT_TWEAK >= rxPwr2) { /* return with EXTERNAL enabled */ mxl111sf_set_ant_path(state, ANT_PATH_EXTERNAL); DbgAntHunt(ANT_PATH_EXTERNAL, rxPwrA, rxPwr0, rxPwr1, rxPwr2); } else { /* return with INTERNAL enabled */ DbgAntHunt(ANT_PATH_INTERNAL, rxPwrA, rxPwr0, rxPwr1, rxPwr2); } } return 0; } static const struct mxl111sf_tuner_config mxl_tuner_config = { .if_freq = MXL_IF_6_0, /* applies to external IF output, only */ .invert_spectrum = 0, .read_reg = mxl111sf_read_reg, .write_reg = mxl111sf_write_reg, .program_regs = mxl111sf_ctrl_program_regs, .top_master_ctrl = mxl1x1sf_top_master_ctrl, .ant_hunt = mxl111sf_ant_hunt, }; static int mxl111sf_attach_tuner(struct dvb_usb_adapter *adap) { struct mxl111sf_state *state = adap_to_priv(adap); #ifdef CONFIG_MEDIA_CONTROLLER_DVB struct media_device *mdev = dvb_get_media_controller(&adap->dvb_adap); int ret; #endif int i; pr_debug("%s()\n", __func__); for (i = 0; i < state->num_frontends; i++) { if (dvb_attach(mxl111sf_tuner_attach, adap->fe[i], state, &mxl_tuner_config) == NULL) return -EIO; adap->fe[i]->ops.read_signal_strength = adap->fe[i]->ops.tuner_ops.get_rf_strength; } #ifdef CONFIG_MEDIA_CONTROLLER_DVB state->tuner.function = MEDIA_ENT_F_TUNER; state->tuner.name = "mxl111sf tuner"; state->tuner_pads[MXL111SF_PAD_RF_INPUT].flags = MEDIA_PAD_FL_SINK; state->tuner_pads[MXL111SF_PAD_RF_INPUT].sig_type = PAD_SIGNAL_ANALOG; state->tuner_pads[MXL111SF_PAD_OUTPUT].flags = MEDIA_PAD_FL_SOURCE; state->tuner_pads[MXL111SF_PAD_OUTPUT].sig_type = PAD_SIGNAL_ANALOG; ret = media_entity_pads_init(&state->tuner, MXL111SF_NUM_PADS, state->tuner_pads); if (ret) return ret; ret = media_device_register_entity(mdev, &state->tuner); if (ret) return ret; #endif return 0; } static u32 mxl111sf_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static struct i2c_algorithm mxl111sf_i2c_algo = { .master_xfer = mxl111sf_i2c_xfer, .functionality = mxl111sf_i2c_func, #ifdef NEED_ALGO_CONTROL .algo_control = dummy_algo_control, #endif }; static int mxl111sf_init(struct dvb_usb_device *d) { struct mxl111sf_state *state = d_to_priv(d); int ret; static u8 eeprom[256]; u8 reg = 0; struct i2c_msg msg[2] = { { .addr = 0xa0 >> 1, .len = 1, .buf = ® }, { .addr = 0xa0 >> 1, .flags = I2C_M_RD, .len = sizeof(eeprom), .buf = eeprom }, }; ret = get_chip_info(state); if (mxl_fail(ret)) pr_err("failed to get chip info during probe"); mutex_init(&state->fe_lock); if (state->chip_rev > MXL111SF_V6) mxl111sf_config_pin_mux_modes(state, PIN_MUX_TS_SPI_IN_MODE_1); ret = i2c_transfer(&d->i2c_adap, msg, 2); if (mxl_fail(ret)) return 0; tveeprom_hauppauge_analog(&state->tv, (0x84 == eeprom[0xa0]) ? eeprom + 0xa0 : eeprom + 0x80); #if 0 switch (state->tv.model) { case 117001: case 126001: case 138001: break; default: printk(KERN_WARNING "%s: warning: unknown hauppauge model #%d\n", __func__, state->tv.model); } #endif return 0; } static int mxl111sf_frontend_attach_dvbt(struct dvb_usb_adapter *adap) { return mxl111sf_attach_demod(adap, 0); } static int mxl111sf_frontend_attach_atsc(struct dvb_usb_adapter *adap) { return mxl111sf_lgdt3305_frontend_attach(adap, 0); } static int mxl111sf_frontend_attach_mh(struct dvb_usb_adapter *adap) { return mxl111sf_lg2160_frontend_attach(adap, 0); } static int mxl111sf_frontend_attach_atsc_mh(struct dvb_usb_adapter *adap) { int ret; pr_debug("%s\n", __func__); ret = mxl111sf_lgdt3305_frontend_attach(adap, 0); if (ret < 0) return ret; ret = mxl111sf_attach_demod(adap, 1); if (ret < 0) return ret; ret = mxl111sf_lg2160_frontend_attach(adap, 2); if (ret < 0) return ret; return ret; } static int mxl111sf_frontend_attach_mercury(struct dvb_usb_adapter *adap) { int ret; pr_debug("%s\n", __func__); ret = mxl111sf_lgdt3305_frontend_attach(adap, 0); if (ret < 0) return ret; ret = mxl111sf_attach_demod(adap, 1); if (ret < 0) return ret; ret = mxl111sf_lg2161_ep6_frontend_attach(adap, 2); if (ret < 0) return ret; return ret; } static int mxl111sf_frontend_attach_mercury_mh(struct dvb_usb_adapter *adap) { int ret; pr_debug("%s\n", __func__); ret = mxl111sf_attach_demod(adap, 0); if (ret < 0) return ret; if (dvb_usb_mxl111sf_spi) ret = mxl111sf_lg2161_frontend_attach(adap, 1); else ret = mxl111sf_lg2161_ep6_frontend_attach(adap, 1); return ret; } static void mxl111sf_stream_config_bulk(struct usb_data_stream_properties *stream, u8 endpoint) { pr_debug("%s: endpoint=%d size=8192\n", __func__, endpoint); stream->type = USB_BULK; stream->count = 5; stream->endpoint = endpoint; stream->u.bulk.buffersize = 8192; } static void mxl111sf_stream_config_isoc(struct usb_data_stream_properties *stream, u8 endpoint, int framesperurb, int framesize) { pr_debug("%s: endpoint=%d size=%d\n", __func__, endpoint, framesperurb * framesize); stream->type = USB_ISOC; stream->count = 5; stream->endpoint = endpoint; stream->u.isoc.framesperurb = framesperurb; stream->u.isoc.framesize = framesize; stream->u.isoc.interval = 1; } /* DVB USB Driver stuff */ /* dvbt mxl111sf * bulk EP4/BULK/5/8192 * isoc EP4/ISOC/5/96/564 */ static int mxl111sf_get_stream_config_dvbt(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); return 0; } static int mxl111sf_probe(struct dvb_usb_device *dev) { struct mxl111sf_state *state = d_to_priv(dev); mutex_init(&state->msg_lock); return 0; } static struct dvb_usb_device_properties mxl111sf_props_dvbt = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_dvbt, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_ep4_streaming_ctrl, .get_stream_config = mxl111sf_get_stream_config_dvbt, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* atsc lgdt3305 * bulk EP6/BULK/5/8192 * isoc EP6/ISOC/5/24/3072 */ static int mxl111sf_get_stream_config_atsc(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); return 0; } static struct dvb_usb_device_properties mxl111sf_props_atsc = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_ep6_streaming_ctrl, .get_stream_config = mxl111sf_get_stream_config_atsc, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* mh lg2160 * bulk EP5/BULK/5/8192/RAW * isoc EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_mh(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); return 0; } static struct dvb_usb_device_properties mxl111sf_props_mh = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mh, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_ep5_streaming_ctrl, .get_stream_config = mxl111sf_get_stream_config_mh, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* atsc mh lgdt3305 mxl111sf lg2160 * bulk EP6/BULK/5/8192 EP4/BULK/5/8192 EP5/BULK/5/8192/RAW * isoc EP6/ISOC/5/24/3072 EP4/ISOC/5/96/564 EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_atsc_mh(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); if (fe->id == 0) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } else if (fe->id == 1) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); } else if (fe->id == 2) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); } return 0; } static int mxl111sf_streaming_ctrl_atsc_mh(struct dvb_frontend *fe, int onoff) { pr_debug("%s: fe=%d onoff=%d\n", __func__, fe->id, onoff); if (fe->id == 0) return mxl111sf_ep6_streaming_ctrl(fe, onoff); else if (fe->id == 1) return mxl111sf_ep4_streaming_ctrl(fe, onoff); else if (fe->id == 2) return mxl111sf_ep5_streaming_ctrl(fe, onoff); return 0; } static struct dvb_usb_device_properties mxl111sf_props_atsc_mh = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc_mh, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_streaming_ctrl_atsc_mh, .get_stream_config = mxl111sf_get_stream_config_atsc_mh, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* mercury lgdt3305 mxl111sf lg2161 * tp bulk EP6/BULK/5/8192 EP4/BULK/5/8192 EP6/BULK/5/8192/RAW * tp isoc EP6/ISOC/5/24/3072 EP4/ISOC/5/96/564 EP6/ISOC/5/24/3072/RAW * spi bulk EP6/BULK/5/8192 EP4/BULK/5/8192 EP5/BULK/5/8192/RAW * spi isoc EP6/ISOC/5/24/3072 EP4/ISOC/5/96/564 EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_mercury(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); if (fe->id == 0) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } else if (fe->id == 1) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); } else if (fe->id == 2 && dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); } else if (fe->id == 2 && !dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } return 0; } static int mxl111sf_streaming_ctrl_mercury(struct dvb_frontend *fe, int onoff) { pr_debug("%s: fe=%d onoff=%d\n", __func__, fe->id, onoff); if (fe->id == 0) return mxl111sf_ep6_streaming_ctrl(fe, onoff); else if (fe->id == 1) return mxl111sf_ep4_streaming_ctrl(fe, onoff); else if (fe->id == 2 && dvb_usb_mxl111sf_spi) return mxl111sf_ep5_streaming_ctrl(fe, onoff); else if (fe->id == 2 && !dvb_usb_mxl111sf_spi) return mxl111sf_ep6_streaming_ctrl(fe, onoff); return 0; } static struct dvb_usb_device_properties mxl111sf_props_mercury = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_streaming_ctrl_mercury, .get_stream_config = mxl111sf_get_stream_config_mercury, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* mercury mh mxl111sf lg2161 * tp bulk EP4/BULK/5/8192 EP6/BULK/5/8192/RAW * tp isoc EP4/ISOC/5/96/564 EP6/ISOC/5/24/3072/RAW * spi bulk EP4/BULK/5/8192 EP5/BULK/5/8192/RAW * spi isoc EP4/ISOC/5/96/564 EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_mercury_mh(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); if (fe->id == 0) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); } else if (fe->id == 1 && dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); } else if (fe->id == 1 && !dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } return 0; } static int mxl111sf_streaming_ctrl_mercury_mh(struct dvb_frontend *fe, int onoff) { pr_debug("%s: fe=%d onoff=%d\n", __func__, fe->id, onoff); if (fe->id == 0) return mxl111sf_ep4_streaming_ctrl(fe, onoff); else if (fe->id == 1 && dvb_usb_mxl111sf_spi) return mxl111sf_ep5_streaming_ctrl(fe, onoff); else if (fe->id == 1 && !dvb_usb_mxl111sf_spi) return mxl111sf_ep6_streaming_ctrl(fe, onoff); return 0; } static struct dvb_usb_device_properties mxl111sf_props_mercury_mh = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury_mh, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_streaming_ctrl_mercury_mh, .get_stream_config = mxl111sf_get_stream_config_mercury_mh, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; static const struct usb_device_id mxl111sf_id_table[] = { { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc600, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc601, &mxl111sf_props_atsc, "Hauppauge 126xxx ATSC", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc602, &mxl111sf_props_mh, "HCW 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc603, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc604, &mxl111sf_props_dvbt, "Hauppauge 126xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc609, &mxl111sf_props_atsc, "Hauppauge 126xxx ATSC", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc60a, &mxl111sf_props_mh, "HCW 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc60b, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc60c, &mxl111sf_props_dvbt, "Hauppauge 126xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc653, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc65b, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb700, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb701, &mxl111sf_props_atsc, "Hauppauge 126xxx ATSC", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb702, &mxl111sf_props_mh, "HCW 117xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb703, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb704, &mxl111sf_props_dvbt, "Hauppauge 117xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb753, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb763, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb764, &mxl111sf_props_dvbt, "Hauppauge 117xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd853, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd854, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd863, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd864, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8d3, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8d4, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8e3, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8e4, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8ff, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc612, &mxl111sf_props_mercury_mh, "Hauppauge 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc613, &mxl111sf_props_mercury, "Hauppauge WinTV-Aero-M", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc61a, &mxl111sf_props_mercury_mh, "Hauppauge 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc61b, &mxl111sf_props_mercury, "Hauppauge WinTV-Aero-M", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb757, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb767, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { } }; MODULE_DEVICE_TABLE(usb, mxl111sf_id_table); static struct usb_driver mxl111sf_usb_driver = { .name = KBUILD_MODNAME, .id_table = mxl111sf_id_table, .probe = dvb_usbv2_probe, .disconnect = dvb_usbv2_disconnect, .suspend = dvb_usbv2_suspend, .resume = dvb_usbv2_resume, .no_dynamic_id = 1, .soft_unbind = 1, }; module_usb_driver(mxl111sf_usb_driver); MODULE_AUTHOR("Michael Krufky <mkrufky@linuxtv.org>"); MODULE_DESCRIPTION("Driver for MaxLinear MxL111SF"); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL"); |
278 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BLOCKGROUP_LOCK_H #define _LINUX_BLOCKGROUP_LOCK_H /* * Per-blockgroup locking for ext2 and ext3. * * Simple hashed spinlocking. */ #include <linux/spinlock.h> #include <linux/cache.h> #ifdef CONFIG_SMP #define NR_BG_LOCKS (4 << ilog2(NR_CPUS < 32 ? NR_CPUS : 32)) #else #define NR_BG_LOCKS 1 #endif struct bgl_lock { spinlock_t lock; } ____cacheline_aligned_in_smp; struct blockgroup_lock { struct bgl_lock locks[NR_BG_LOCKS]; }; static inline void bgl_lock_init(struct blockgroup_lock *bgl) { int i; for (i = 0; i < NR_BG_LOCKS; i++) spin_lock_init(&bgl->locks[i].lock); } static inline spinlock_t * bgl_lock_ptr(struct blockgroup_lock *bgl, unsigned int block_group) { return &bgl->locks[block_group & (NR_BG_LOCKS-1)].lock; } #endif |
4 4 4 3 2 7 4 10 9 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ #include <rdma/rdma_netlink.h> #include <net/addrconf.h> #include "rxe.h" #include "rxe_loc.h" MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); /* free resources for a rxe device all objects created for this device must * have been destroyed */ void rxe_dealloc(struct ib_device *ib_dev) { struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); rxe_pool_cleanup(&rxe->uc_pool); rxe_pool_cleanup(&rxe->pd_pool); rxe_pool_cleanup(&rxe->ah_pool); rxe_pool_cleanup(&rxe->srq_pool); rxe_pool_cleanup(&rxe->qp_pool); rxe_pool_cleanup(&rxe->cq_pool); rxe_pool_cleanup(&rxe->mr_pool); rxe_pool_cleanup(&rxe->mw_pool); WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree)); if (rxe->tfm) crypto_free_shash(rxe->tfm); } /* initialize rxe device parameters */ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; rxe->attr.vendor_id = RXE_VENDOR_ID; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG; rxe->attr.max_send_sge = RXE_MAX_SGE; rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; rxe->attr.max_cq = RXE_MAX_CQ; rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; rxe->attr.max_mr = RXE_MAX_MR; rxe->attr.max_mw = RXE_MAX_MW; rxe->attr.max_pd = RXE_MAX_PD; rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; rxe->attr.atomic_cap = IB_ATOMIC_HCA; rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; rxe->attr.max_ah = RXE_MAX_AH; rxe->attr.max_srq = RXE_MAX_SRQ; rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; rxe->attr.max_pkeys = RXE_MAX_PKEYS; rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, rxe->ndev->dev_addr); rxe->max_ucontext = RXE_MAX_UCONTEXT; } /* initialize port attributes */ static void rxe_init_port_param(struct rxe_port *port) { port->attr.state = IB_PORT_DOWN; port->attr.max_mtu = IB_MTU_4096; port->attr.active_mtu = IB_MTU_256; port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR; port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR; port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN; port->attr.lid = RXE_PORT_LID; port->attr.sm_lid = RXE_PORT_SM_LID; port->attr.lmc = RXE_PORT_LMC; port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM; port->attr.sm_sl = RXE_PORT_SM_SL; port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT; port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY; port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; port->attr.phys_state = RXE_PORT_PHYS_STATE; port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); } /* initialize port state, note IB convention that HCA ports are always * numbered from 1 */ static void rxe_init_ports(struct rxe_dev *rxe) { struct rxe_port *port = &rxe->port; rxe_init_port_param(port); addrconf_addr_eui48((unsigned char *)&port->port_guid, rxe->ndev->dev_addr); spin_lock_init(&port->port_lock); } /* init pools of managed objects */ static void rxe_init_pools(struct rxe_dev *rxe) { rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC); rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD); rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH); rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ); rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP); rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ); rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR); rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW); } /* initialize rxe device state */ static void rxe_init(struct rxe_dev *rxe) { /* init default device parameters */ rxe_init_device_param(rxe); rxe_init_ports(rxe); rxe_init_pools(rxe); /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); /* init multicast support */ spin_lock_init(&rxe->mcg_lock); rxe->mcg_tree = RB_ROOT; mutex_init(&rxe->usdev_lock); } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) { struct rxe_port *port = &rxe->port; enum ib_mtu mtu; mtu = eth_mtu_int_to_enum(ndev_mtu); /* Make sure that new MTU in range */ mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256; port->attr.active_mtu = mtu; port->mtu_cap = ib_mtu_enum_to_int(mtu); rxe_info_dev(rxe, "Set mtu to %d", port->mtu_cap); } /* called by ifc layer to create new rxe device. * The caller should allocate memory for rxe by calling ib_alloc_device. */ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) { rxe_init(rxe); rxe_set_mtu(rxe, mtu); return rxe_register_device(rxe, ibdev_name); } static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) { struct rxe_dev *rxe; int err = 0; if (is_vlan_dev(ndev)) { rxe_err("rxe creation allowed on top of a real device only"); err = -EPERM; goto err; } rxe = rxe_get_dev_from_net(ndev); if (rxe) { ib_device_put(&rxe->ib_dev); rxe_err_dev(rxe, "already configured on %s", ndev->name); err = -EEXIST; goto err; } err = rxe_net_add(ibdev_name, ndev); if (err) { rxe_err("failed to add %s\n", ndev->name); goto err; } err: return err; } static struct rdma_link_ops rxe_link_ops = { .type = "rxe", .newlink = rxe_newlink, }; static int __init rxe_module_init(void) { int err; err = rxe_alloc_wq(); if (err) return err; err = rxe_net_init(); if (err) { rxe_destroy_wq(); return err; } rdma_link_register(&rxe_link_ops); pr_info("loaded\n"); return 0; } static void __exit rxe_module_exit(void) { rdma_link_unregister(&rxe_link_ops); ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); rxe_destroy_wq(); pr_info("unloaded\n"); } late_initcall(rxe_module_init); module_exit(rxe_module_exit); MODULE_ALIAS_RDMA_LINK("rxe"); |
92 24 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance * * Original mutex implementation started by Ingo Molnar: * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * * Wait/Die implementation: * Copyright (C) 2013 Canonical Ltd. * Choice of algorithm: * Copyright (C) 2018 WMWare Inc. * * This file contains the main data structure and API definitions. */ #ifndef __LINUX_WW_MUTEX_H #define __LINUX_WW_MUTEX_H #include <linux/mutex.h> #include <linux/rtmutex.h> #if defined(CONFIG_DEBUG_MUTEXES) || \ (defined(CONFIG_PREEMPT_RT) && defined(CONFIG_DEBUG_RT_MUTEXES)) #define DEBUG_WW_MUTEXES #endif #ifndef CONFIG_PREEMPT_RT #define WW_MUTEX_BASE mutex #define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k) #define ww_mutex_base_is_locked(b) mutex_is_locked((b)) #else #define WW_MUTEX_BASE rt_mutex #define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k) #define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex) #endif struct ww_class { atomic_long_t stamp; struct lock_class_key acquire_key; struct lock_class_key mutex_key; const char *acquire_name; const char *mutex_name; unsigned int is_wait_die; }; struct ww_mutex { struct WW_MUTEX_BASE base; struct ww_acquire_ctx *ctx; #ifdef DEBUG_WW_MUTEXES struct ww_class *ww_class; #endif }; struct ww_acquire_ctx { struct task_struct *task; unsigned long stamp; unsigned int acquired; unsigned short wounded; unsigned short is_wait_die; #ifdef DEBUG_WW_MUTEXES unsigned int done_acquire; struct ww_class *ww_class; void *contending_lock; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; unsigned int deadlock_inject_countdown; #endif }; #define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die) \ { .stamp = ATOMIC_LONG_INIT(0) \ , .acquire_name = #ww_class "_acquire" \ , .mutex_name = #ww_class "_mutex" \ , .is_wait_die = _is_wait_die } #define DEFINE_WD_CLASS(classname) \ struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1) #define DEFINE_WW_CLASS(classname) \ struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0) /** * ww_mutex_init - initialize the w/w mutex * @lock: the mutex to be initialized * @ww_class: the w/w class the mutex should belong to * * Initialize the w/w mutex to unlocked state and associate it with the given * class. Static define macro for w/w mutex is not provided and this function * is the only way to properly initialize the w/w mutex. * * It is not allowed to initialize an already locked mutex. */ static inline void ww_mutex_init(struct ww_mutex *lock, struct ww_class *ww_class) { ww_mutex_base_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); lock->ctx = NULL; #ifdef DEBUG_WW_MUTEXES lock->ww_class = ww_class; #endif } /** * ww_acquire_init - initialize a w/w acquire context * @ctx: w/w acquire context to initialize * @ww_class: w/w class of the context * * Initializes an context to acquire multiple mutexes of the given w/w class. * * Context-based w/w mutex acquiring can be done in any order whatsoever within * a given lock class. Deadlocks will be detected and handled with the * wait/die logic. * * Mixing of context-based w/w mutex acquiring and single w/w mutex locking can * result in undetected deadlocks and is so forbidden. Mixing different contexts * for the same w/w class when acquiring mutexes can also result in undetected * deadlocks, and is hence also forbidden. Both types of abuse will be caught by * enabling CONFIG_PROVE_LOCKING. * * Nesting of acquire contexts for _different_ w/w classes is possible, subject * to the usual locking rules between different lock classes. * * An acquire context must be released with ww_acquire_fini by the same task * before the memory is freed. It is recommended to allocate the context itself * on the stack. */ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, struct ww_class *ww_class) { ctx->task = current; ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); ctx->acquired = 0; ctx->wounded = false; ctx->is_wait_die = ww_class->is_wait_die; #ifdef DEBUG_WW_MUTEXES ctx->ww_class = ww_class; ctx->done_acquire = 0; ctx->contending_lock = NULL; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0); mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; ctx->deadlock_inject_countdown = ctx->stamp & 0xf; #endif } /** * ww_acquire_done - marks the end of the acquire phase * @ctx: the acquire context * * Marks the end of the acquire phase, any further w/w mutex lock calls using * this context are forbidden. * * Calling this function is optional, it is just useful to document w/w mutex * code and clearly designated the acquire phase from actually using the locked * data structures. */ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) { #ifdef DEBUG_WW_MUTEXES lockdep_assert_held(ctx); DEBUG_LOCKS_WARN_ON(ctx->done_acquire); ctx->done_acquire = 1; #endif } /** * ww_acquire_fini - releases a w/w acquire context * @ctx: the acquire context to free * * Releases a w/w acquire context. This must be called _after_ all acquired w/w * mutexes have been released with ww_mutex_unlock. */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) /* * lockdep will normally handle this, * but fail without anyway */ ctx->done_acquire = 1; if (!IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC)) /* ensure ww_acquire_fini will still fail if called twice */ ctx->acquired = ~0U; #endif } /** * ww_mutex_lock - acquire the w/w mutex * @lock: the mutex to be acquired * @ctx: w/w acquire context, or NULL to acquire only a single lock. * * Lock the w/w mutex exclusively for this task. * * Deadlocks within a given w/w class of locks are detected and handled with the * wait/die algorithm. If the lock isn't immediately available this function * will either sleep until it is (wait case). Or it selects the current context * for backing off by returning -EDEADLK (die case). Trying to acquire the * same lock with the same context twice is also detected and signalled by * returning -EALREADY. Returns 0 if the mutex was successfully acquired. * * In the die case the caller must release all currently held w/w mutexes for * the given context and then wait for this contending lock to be available by * calling ww_mutex_lock_slow. Alternatively callers can opt to not acquire this * lock and proceed with trying to acquire further w/w mutexes (e.g. when * scanning through lru lists trying to free resources). * * The mutex must later on be released by the same task that * acquired it. The task may not exit without first unlocking the mutex. Also, * kernel memory where the mutex resides must not be freed with the mutex still * locked. The mutex must first be initialized (or statically defined) before it * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be * of the same w/w lock class as was used to initialize the acquire context. * * A mutex acquired with this function must be released with ww_mutex_unlock. */ extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible * @lock: the mutex to be acquired * @ctx: w/w acquire context * * Lock the w/w mutex exclusively for this task. * * Deadlocks within a given w/w class of locks are detected and handled with the * wait/die algorithm. If the lock isn't immediately available this function * will either sleep until it is (wait case). Or it selects the current context * for backing off by returning -EDEADLK (die case). Trying to acquire the * same lock with the same context twice is also detected and signalled by * returning -EALREADY. Returns 0 if the mutex was successfully acquired. If a * signal arrives while waiting for the lock then this function returns -EINTR. * * In the die case the caller must release all currently held w/w mutexes for * the given context and then wait for this contending lock to be available by * calling ww_mutex_lock_slow_interruptible. Alternatively callers can opt to * not acquire this lock and proceed with trying to acquire further w/w mutexes * (e.g. when scanning through lru lists trying to free resources). * * The mutex must later on be released by the same task that * acquired it. The task may not exit without first unlocking the mutex. Also, * kernel memory where the mutex resides must not be freed with the mutex still * locked. The mutex must first be initialized (or statically defined) before it * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be * of the same w/w lock class as was used to initialize the acquire context. * * A mutex acquired with this function must be released with ww_mutex_unlock. */ extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex * @lock: the mutex to be acquired * @ctx: w/w acquire context * * Acquires a w/w mutex with the given context after a die case. This function * will sleep until the lock becomes available. * * The caller must have released all w/w mutexes already acquired with the * context and then call this function on the contended lock. * * Afterwards the caller may continue to (re)acquire the other w/w mutexes it * needs with ww_mutex_lock. Note that the -EALREADY return code from * ww_mutex_lock can be used to avoid locking this contended mutex twice. * * It is forbidden to call this function with any other w/w mutexes associated * with the context held. It is forbidden to call this on anything else than the * contending mutex. * * Note that the slowpath lock acquiring can also be done by calling * ww_mutex_lock directly. This function here is simply to help w/w mutex * locking code readability by clearly denoting the slowpath. */ static inline void ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; #ifdef DEBUG_WW_MUTEXES DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); #endif ret = ww_mutex_lock(lock, ctx); (void)ret; } /** * ww_mutex_lock_slow_interruptible - slowpath acquiring of the w/w mutex, interruptible * @lock: the mutex to be acquired * @ctx: w/w acquire context * * Acquires a w/w mutex with the given context after a die case. This function * will sleep until the lock becomes available and returns 0 when the lock has * been acquired. If a signal arrives while waiting for the lock then this * function returns -EINTR. * * The caller must have released all w/w mutexes already acquired with the * context and then call this function on the contended lock. * * Afterwards the caller may continue to (re)acquire the other w/w mutexes it * needs with ww_mutex_lock. Note that the -EALREADY return code from * ww_mutex_lock can be used to avoid locking this contended mutex twice. * * It is forbidden to call this function with any other w/w mutexes associated * with the given context held. It is forbidden to call this on anything else * than the contending mutex. * * Note that the slowpath lock acquiring can also be done by calling * ww_mutex_lock_interruptible directly. This function here is simply to help * w/w mutex locking code readability by clearly denoting the slowpath. */ static inline int __must_check ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { #ifdef DEBUG_WW_MUTEXES DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); #endif return ww_mutex_lock_interruptible(lock, ctx); } extern void ww_mutex_unlock(struct ww_mutex *lock); extern int __must_check ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /*** * ww_mutex_destroy - mark a w/w mutex unusable * @lock: the mutex to be destroyed * * This function marks the mutex uninitialized, and any subsequent * use of the mutex is forbidden. The mutex must not be locked when * this function is called. */ static inline void ww_mutex_destroy(struct ww_mutex *lock) { #ifndef CONFIG_PREEMPT_RT mutex_destroy(&lock->base); #endif } /** * ww_mutex_is_locked - is the w/w mutex locked * @lock: the mutex to be queried * * Returns 1 if the mutex is locked, 0 if unlocked. */ static inline bool ww_mutex_is_locked(struct ww_mutex *lock) { return ww_mutex_base_is_locked(&lock->base); } #endif |
1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 | // SPDX-License-Identifier: GPL-2.0-only /* * IEEE 802.15.4 scanning management * * Copyright (C) 2021 Qorvo US, Inc * Authors: * - David Girault <david.girault@qorvo.com> * - Miquel Raynal <miquel.raynal@bootlin.com> */ #include <linux/module.h> #include <linux/rtnetlink.h> #include <net/mac802154.h> #include "ieee802154_i.h" #include "driver-ops.h" #include "../ieee802154/nl802154.h" #define IEEE802154_BEACON_MHR_SZ 13 #define IEEE802154_BEACON_PL_SZ 4 #define IEEE802154_MAC_CMD_MHR_SZ 23 #define IEEE802154_MAC_CMD_PL_SZ 1 #define IEEE802154_BEACON_SKB_SZ (IEEE802154_BEACON_MHR_SZ + \ IEEE802154_BEACON_PL_SZ) #define IEEE802154_MAC_CMD_SKB_SZ (IEEE802154_MAC_CMD_MHR_SZ + \ IEEE802154_MAC_CMD_PL_SZ) /* mac802154_scan_cleanup_locked() must be called upon scan completion or abort. * - Completions are asynchronous, not locked by the rtnl and decided by the * scan worker. * - Aborts are decided by userspace, and locked by the rtnl. * * Concurrent modifications to the PHY, the interfaces or the hardware is in * general prevented by the rtnl. So in most cases we don't need additional * protection. * * However, the scan worker get's triggered without anybody noticing and thus we * must ensure the presence of the devices as well as data consistency: * - The sub-interface and device driver module get both their reference * counters incremented whenever we start a scan, so they cannot disappear * during operation. * - Data consistency is achieved by the use of rcu protected pointers. */ static int mac802154_scan_cleanup_locked(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata, bool aborted) { struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct wpan_phy *wpan_phy = local->phy; struct cfg802154_scan_request *request; u8 arg; /* Prevent any further use of the scan request */ clear_bit(IEEE802154_IS_SCANNING, &local->ongoing); cancel_delayed_work(&local->scan_work); request = rcu_replace_pointer(local->scan_req, NULL, 1); if (!request) return 0; kvfree_rcu_mightsleep(request); /* Advertize first, while we know the devices cannot be removed */ if (aborted) arg = NL802154_SCAN_DONE_REASON_ABORTED; else arg = NL802154_SCAN_DONE_REASON_FINISHED; nl802154_scan_done(wpan_phy, wpan_dev, arg); /* Cleanup software stack */ ieee802154_mlme_op_post(local); /* Set the hardware back in its original state */ drv_set_channel(local, wpan_phy->current_page, wpan_phy->current_channel); ieee802154_configure_durations(wpan_phy, wpan_phy->current_page, wpan_phy->current_channel); drv_stop(local); synchronize_net(); sdata->required_filtering = sdata->iface_default_filtering; drv_start(local, sdata->required_filtering, &local->addr_filt); return 0; } int mac802154_abort_scan_locked(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata) { ASSERT_RTNL(); if (!mac802154_is_scanning(local)) return -ESRCH; return mac802154_scan_cleanup_locked(local, sdata, true); } static unsigned int mac802154_scan_get_channel_time(u8 duration_order, u8 symbol_duration) { u64 base_super_frame_duration = (u64)symbol_duration * IEEE802154_SUPERFRAME_PERIOD * IEEE802154_SLOT_PERIOD; return usecs_to_jiffies(base_super_frame_duration * (BIT(duration_order) + 1)); } static void mac802154_flush_queued_beacons(struct ieee802154_local *local) { struct cfg802154_mac_pkt *mac_pkt, *tmp; list_for_each_entry_safe(mac_pkt, tmp, &local->rx_beacon_list, node) { list_del(&mac_pkt->node); kfree_skb(mac_pkt->skb); kfree(mac_pkt); } } static void mac802154_scan_get_next_channel(struct ieee802154_local *local, struct cfg802154_scan_request *scan_req, u8 *channel) { (*channel)++; *channel = find_next_bit((const unsigned long *)&scan_req->channels, IEEE802154_MAX_CHANNEL + 1, *channel); } static int mac802154_scan_find_next_chan(struct ieee802154_local *local, struct cfg802154_scan_request *scan_req, u8 page, u8 *channel) { mac802154_scan_get_next_channel(local, scan_req, channel); if (*channel > IEEE802154_MAX_CHANNEL) return -EINVAL; return 0; } static int mac802154_scan_prepare_beacon_req(struct ieee802154_local *local) { memset(&local->scan_beacon_req, 0, sizeof(local->scan_beacon_req)); local->scan_beacon_req.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; local->scan_beacon_req.mhr.fc.dest_addr_mode = IEEE802154_SHORT_ADDRESSING; local->scan_beacon_req.mhr.fc.version = IEEE802154_2003_STD; local->scan_beacon_req.mhr.fc.source_addr_mode = IEEE802154_NO_ADDRESSING; local->scan_beacon_req.mhr.dest.mode = IEEE802154_ADDR_SHORT; local->scan_beacon_req.mhr.dest.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); local->scan_beacon_req.mhr.dest.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); local->scan_beacon_req.mac_pl.cmd_id = IEEE802154_CMD_BEACON_REQ; return 0; } static int mac802154_transmit_beacon_req(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata) { struct sk_buff *skb; int ret; skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ, GFP_KERNEL); if (!skb) return -ENOBUFS; skb->dev = sdata->dev; ret = ieee802154_mac_cmd_push(skb, &local->scan_beacon_req, NULL, 0); if (ret) { kfree_skb(skb); return ret; } return ieee802154_mlme_tx(local, sdata, skb); } void mac802154_scan_worker(struct work_struct *work) { struct ieee802154_local *local = container_of(work, struct ieee802154_local, scan_work.work); struct cfg802154_scan_request *scan_req; struct ieee802154_sub_if_data *sdata; unsigned int scan_duration = 0; struct wpan_phy *wpan_phy; u8 scan_req_duration; u8 page, channel; int ret; /* Ensure the device receiver is turned off when changing channels * because there is no atomic way to change the channel and know on * which one a beacon might have been received. */ drv_stop(local); synchronize_net(); mac802154_flush_queued_beacons(local); rcu_read_lock(); scan_req = rcu_dereference(local->scan_req); if (unlikely(!scan_req)) { rcu_read_unlock(); return; } sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(scan_req->wpan_dev); /* Wait an arbitrary amount of time in case we cannot use the device */ if (local->suspended || !ieee802154_sdata_running(sdata)) { rcu_read_unlock(); queue_delayed_work(local->mac_wq, &local->scan_work, msecs_to_jiffies(1000)); return; } wpan_phy = scan_req->wpan_phy; scan_req_duration = scan_req->duration; /* Look for the next valid chan */ page = local->scan_page; channel = local->scan_channel; do { ret = mac802154_scan_find_next_chan(local, scan_req, page, &channel); if (ret) { rcu_read_unlock(); goto end_scan; } } while (!ieee802154_chan_is_valid(scan_req->wpan_phy, page, channel)); rcu_read_unlock(); /* Bypass the stack on purpose when changing the channel */ rtnl_lock(); ret = drv_set_channel(local, page, channel); rtnl_unlock(); if (ret) { dev_err(&sdata->dev->dev, "Channel change failure during scan, aborting (%d)\n", ret); goto end_scan; } local->scan_page = page; local->scan_channel = channel; rtnl_lock(); ret = drv_start(local, IEEE802154_FILTERING_3_SCAN, &local->addr_filt); rtnl_unlock(); if (ret) { dev_err(&sdata->dev->dev, "Restarting failure after channel change, aborting (%d)\n", ret); goto end_scan; } if (scan_req->type == NL802154_SCAN_ACTIVE) { ret = mac802154_transmit_beacon_req(local, sdata); if (ret) dev_err(&sdata->dev->dev, "Error when transmitting beacon request (%d)\n", ret); } ieee802154_configure_durations(wpan_phy, page, channel); scan_duration = mac802154_scan_get_channel_time(scan_req_duration, wpan_phy->symbol_duration); dev_dbg(&sdata->dev->dev, "Scan page %u channel %u for %ums\n", page, channel, jiffies_to_msecs(scan_duration)); queue_delayed_work(local->mac_wq, &local->scan_work, scan_duration); return; end_scan: rtnl_lock(); mac802154_scan_cleanup_locked(local, sdata, false); rtnl_unlock(); } int mac802154_trigger_scan_locked(struct ieee802154_sub_if_data *sdata, struct cfg802154_scan_request *request) { struct ieee802154_local *local = sdata->local; ASSERT_RTNL(); if (mac802154_is_scanning(local)) return -EBUSY; if (request->type != NL802154_SCAN_PASSIVE && request->type != NL802154_SCAN_ACTIVE) return -EOPNOTSUPP; /* Store scanning parameters */ rcu_assign_pointer(local->scan_req, request); /* Software scanning requires to set promiscuous mode, so we need to * pause the Tx queue during the entire operation. */ ieee802154_mlme_op_pre(local); sdata->required_filtering = IEEE802154_FILTERING_3_SCAN; local->scan_page = request->page; local->scan_channel = -1; set_bit(IEEE802154_IS_SCANNING, &local->ongoing); if (request->type == NL802154_SCAN_ACTIVE) mac802154_scan_prepare_beacon_req(local); nl802154_scan_started(request->wpan_phy, request->wpan_dev); queue_delayed_work(local->mac_wq, &local->scan_work, 0); return 0; } int mac802154_process_beacon(struct ieee802154_local *local, struct sk_buff *skb, u8 page, u8 channel) { struct ieee802154_beacon_hdr *bh = (void *)skb->data; struct ieee802154_addr *src = &mac_cb(skb)->source; struct cfg802154_scan_request *scan_req; struct ieee802154_coord_desc desc; if (skb->len != sizeof(*bh)) return -EINVAL; if (unlikely(src->mode == IEEE802154_ADDR_NONE)) return -EINVAL; dev_dbg(&skb->dev->dev, "BEACON received on page %u channel %u\n", page, channel); memcpy(&desc.addr, src, sizeof(desc.addr)); desc.page = page; desc.channel = channel; desc.link_quality = mac_cb(skb)->lqi; desc.superframe_spec = get_unaligned_le16(skb->data); desc.gts_permit = bh->gts_permit; trace_802154_scan_event(&desc); rcu_read_lock(); scan_req = rcu_dereference(local->scan_req); if (likely(scan_req)) nl802154_scan_event(scan_req->wpan_phy, scan_req->wpan_dev, &desc); rcu_read_unlock(); return 0; } static int mac802154_transmit_beacon(struct ieee802154_local *local, struct wpan_dev *wpan_dev) { struct cfg802154_beacon_request *beacon_req; struct ieee802154_sub_if_data *sdata; struct sk_buff *skb; int ret; /* Update the sequence number */ local->beacon.mhr.seq = atomic_inc_return(&wpan_dev->bsn) & 0xFF; skb = alloc_skb(IEEE802154_BEACON_SKB_SZ, GFP_KERNEL); if (!skb) return -ENOBUFS; rcu_read_lock(); beacon_req = rcu_dereference(local->beacon_req); if (unlikely(!beacon_req)) { rcu_read_unlock(); kfree_skb(skb); return -EINVAL; } sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(beacon_req->wpan_dev); skb->dev = sdata->dev; rcu_read_unlock(); ret = ieee802154_beacon_push(skb, &local->beacon); if (ret) { kfree_skb(skb); return ret; } /* Using the MLME transmission helper for sending beacons is a bit * overkill because we do not really care about the final outcome. * * Even though, going through the whole net stack with a regular * dev_queue_xmit() is not relevant either because we want beacons to be * sent "now" rather than go through the whole net stack scheduling * (qdisc & co). * * Finally, using ieee802154_subif_start_xmit() would only be an option * if we had a generic transmit helper which would acquire the * HARD_TX_LOCK() to prevent buffer handling conflicts with regular * packets. * * So for now we keep it simple and send beacons with our MLME helper, * even if it stops the ieee802154 queue entirely during these * transmissions, wich anyway does not have a huge impact on the * performances given the current design of the stack. */ return ieee802154_mlme_tx(local, sdata, skb); } void mac802154_beacon_worker(struct work_struct *work) { struct ieee802154_local *local = container_of(work, struct ieee802154_local, beacon_work.work); struct cfg802154_beacon_request *beacon_req; struct ieee802154_sub_if_data *sdata; struct wpan_dev *wpan_dev; u8 interval; int ret; rcu_read_lock(); beacon_req = rcu_dereference(local->beacon_req); if (unlikely(!beacon_req)) { rcu_read_unlock(); return; } sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(beacon_req->wpan_dev); /* Wait an arbitrary amount of time in case we cannot use the device */ if (local->suspended || !ieee802154_sdata_running(sdata)) { rcu_read_unlock(); queue_delayed_work(local->mac_wq, &local->beacon_work, msecs_to_jiffies(1000)); return; } wpan_dev = beacon_req->wpan_dev; interval = beacon_req->interval; rcu_read_unlock(); dev_dbg(&sdata->dev->dev, "Sending beacon\n"); ret = mac802154_transmit_beacon(local, wpan_dev); if (ret) dev_err(&sdata->dev->dev, "Beacon could not be transmitted (%d)\n", ret); if (interval < IEEE802154_ACTIVE_SCAN_DURATION) queue_delayed_work(local->mac_wq, &local->beacon_work, local->beacon_interval); } int mac802154_stop_beacons_locked(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata) { struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct cfg802154_beacon_request *request; ASSERT_RTNL(); if (!mac802154_is_beaconing(local)) return -ESRCH; clear_bit(IEEE802154_IS_BEACONING, &local->ongoing); cancel_delayed_work(&local->beacon_work); request = rcu_replace_pointer(local->beacon_req, NULL, 1); if (!request) return 0; kvfree_rcu_mightsleep(request); nl802154_beaconing_done(wpan_dev); return 0; } int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, struct cfg802154_beacon_request *request) { struct ieee802154_local *local = sdata->local; ASSERT_RTNL(); if (mac802154_is_beaconing(local)) mac802154_stop_beacons_locked(local, sdata); /* Store beaconing parameters */ rcu_assign_pointer(local->beacon_req, request); set_bit(IEEE802154_IS_BEACONING, &local->ongoing); memset(&local->beacon, 0, sizeof(local->beacon)); local->beacon.mhr.fc.type = IEEE802154_FC_TYPE_BEACON; local->beacon.mhr.fc.security_enabled = 0; local->beacon.mhr.fc.frame_pending = 0; local->beacon.mhr.fc.ack_request = 0; local->beacon.mhr.fc.intra_pan = 0; local->beacon.mhr.fc.dest_addr_mode = IEEE802154_NO_ADDRESSING; local->beacon.mhr.fc.version = IEEE802154_2003_STD; local->beacon.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING; atomic_set(&request->wpan_dev->bsn, -1); local->beacon.mhr.source.mode = IEEE802154_ADDR_LONG; local->beacon.mhr.source.pan_id = request->wpan_dev->pan_id; local->beacon.mhr.source.extended_addr = request->wpan_dev->extended_addr; local->beacon.mac_pl.beacon_order = request->interval; if (request->interval <= IEEE802154_MAX_SCAN_DURATION) local->beacon.mac_pl.superframe_order = request->interval; local->beacon.mac_pl.final_cap_slot = 0xf; local->beacon.mac_pl.battery_life_ext = 0; /* TODO: Fill this field with the coordinator situation in the network */ local->beacon.mac_pl.pan_coordinator = 1; local->beacon.mac_pl.assoc_permit = 1; if (request->interval == IEEE802154_ACTIVE_SCAN_DURATION) return 0; /* Start the beacon work */ local->beacon_interval = mac802154_scan_get_channel_time(request->interval, request->wpan_phy->symbol_duration); queue_delayed_work(local->mac_wq, &local->beacon_work, 0); return 0; } |
95 1 3 100 3 14 352 68 364 932 6799 1305 720 384 12 5 2 80 833 62 4 23 395 3 8 395 10 146 16 24 401 373 122 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H #include <linux/rculist.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/jobctl.h> #include <linux/sched/task.h> #include <linux/cred.h> #include <linux/refcount.h> #include <linux/posix-timers.h> #include <linux/mm_types.h> #include <asm/ptrace.h> /* * Types defining task->signal and task->sighand and APIs using them: */ struct sighand_struct { spinlock_t siglock; refcount_t count; wait_queue_head_t signalfd_wqh; struct k_sigaction action[_NSIG]; }; /* * Per-process accounting stats: */ struct pacct_struct { int ac_flag; long ac_exitcode; unsigned long ac_mem; u64 ac_utime, ac_stime; unsigned long ac_minflt, ac_majflt; }; struct cpu_itimer { u64 expires; u64 incr; }; /* * This is the atomic variant of task_cputime, which can be used for * storing and updating task_cputime statistics without locking. */ struct task_cputime_atomic { atomic64_t utime; atomic64_t stime; atomic64_t sum_exec_runtime; }; #define INIT_CPUTIME_ATOMIC \ (struct task_cputime_atomic) { \ .utime = ATOMIC64_INIT(0), \ .stime = ATOMIC64_INIT(0), \ .sum_exec_runtime = ATOMIC64_INIT(0), \ } /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. */ struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; }; struct multiprocess_signals { sigset_t signal; struct hlist_node node; }; struct core_thread { struct task_struct *task; struct core_thread *next; }; struct core_state { atomic_t nr_threads; struct core_thread dumper; struct completion startup; }; /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always * implies a shared sighand_struct, so locking * sighand_struct is always a proper superset of * the locking of signal_struct. */ struct signal_struct { refcount_t sigcnt; atomic_t live; int nr_threads; int quick_threads; struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ /* current thread group signal load-balancing target: */ struct task_struct *curr_target; /* shared signal handling: */ struct sigpending shared_pending; /* For collecting multiprocess signals during fork */ struct hlist_head multiprocess; /* thread group exit support */ int group_exit_code; /* notify group_exec_task when notify_count is less or equal to 0 */ int notify_count; struct task_struct *group_exec_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ struct core_state *core_state; /* coredumping support */ /* * PR_SET_CHILD_SUBREAPER marks a process, like a service * manager, to re-parent orphan (double-forking) child processes * to this process instead of 'init'. The service manager is * able to receive SIGCHLD signals and is able to investigate * the process until it calls wait(). All children of this * process will inherit a flag if they should look for a * child_subreaper process at exit. */ unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ unsigned int next_posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; ktime_t it_real_incr; /* * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these * values are defined to 0 and 1 respectively */ struct cpu_itimer it[2]; /* * Thread group totals for process CPU timers. * See thread_group_cputimer(), et al, for details. */ struct thread_group_cputimer cputimer; #endif /* Empty if CONFIG_POSIX_TIMERS=n */ struct posix_cputimers posix_cputimers; /* PID/PID hash table linkage. */ struct pid *pids[PIDTYPE_MAX]; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif struct pid *tty_old_pgrp; /* boolean value for session group leader */ int leader; struct tty_struct *tty; /* NULL if no tty */ #ifdef CONFIG_SCHED_AUTOGROUP struct autogroup *autogroup; #endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ seqlock_t stats_lock; u64 utime, stime, cutime, cstime; u64 gtime; u64 cgtime; struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; unsigned long maxrss, cmaxrss; struct task_io_accounting ioac; /* * Cumulative ns of schedule CPU time fo dead threads in the * group, not including a zombie group leader, (This only differs * from jiffies_to_ns(utime + stime) if sched_clock uses something * other than jiffies.) */ unsigned long long sum_sched_runtime; /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs * to get both rlim_cur and rlim_max atomically, and either one * alone is a single word that can safely be read normally. * getrlimit/setrlimit use task_lock(current->group_leader) to * protect this instead of the siglock, because they really * have no need to disable irqs. */ struct rlimit rlim[RLIM_NLIMITS]; #ifdef CONFIG_BSD_PROCESS_ACCT struct pacct_struct pacct; /* per-process accounting information */ #endif #ifdef CONFIG_TASKSTATS struct taskstats *stats; #endif #ifdef CONFIG_AUDIT unsigned audit_tty; struct tty_audit_buf *tty_audit_buf; #endif /* * Thread is the potential origin of an oom condition; kill first on * oom */ bool oom_flag_origin; short oom_score_adj; /* OOM kill score adjustment */ short oom_score_adj_min; /* OOM kill score adjustment min value. * Only settable by CAP_SYS_RESOURCE. */ struct mm_struct *oom_mm; /* recorded mm when the thread group got * killed by the oom killer */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations * (notably. ptrace) * Deprecated do not use in new code. * Use exec_update_lock instead. */ struct rw_semaphore exec_update_lock; /* Held while task_struct is * being updated during exec, * and may have inconsistent * permissions. */ } __randomize_layout; /* * Bits in flags field of signal_struct. */ #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ /* * Pending notifications to parent. */ #define SIGNAL_CLD_STOPPED 0x00000010 #define SIGNAL_CLD_CONTINUED 0x00000020 #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ #define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ SIGNAL_STOP_CONTINUED) static inline void signal_set_stop_flags(struct signal_struct *sig, unsigned int flags) { WARN_ON(sig->flags & SIGNAL_GROUP_EXIT); sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; } extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); static inline int kernel_dequeue_signal(void) { struct task_struct *task = current; kernel_siginfo_t __info; enum pid_type __type; int ret; spin_lock_irq(&task->sighand->siglock); ret = dequeue_signal(task, &task->blocked, &__info, &__type); spin_unlock_irq(&task->sighand->siglock); return ret; } static inline void kernel_signal_stop(void) { spin_lock_irq(¤t->sighand->siglock); if (current->jobctl & JOBCTL_STOP_DEQUEUED) { current->jobctl |= JOBCTL_STOPPED; set_special_state(TASK_STOPPED); } spin_unlock_irq(¤t->sighand->siglock); schedule(); } int force_sig_fault_to_task(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_fault(int sig, int code, void __user *addr); int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); int force_sig_pkuerr(void __user *addr, u32 pkey); int send_sig_perf(void __user *addr, u32 type, u64 sig_data); int force_sig_ptrace_errno_trap(int errno, void __user *addr); int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno); int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno, struct task_struct *t); int force_sig_seccomp(int syscall, int reason, bool force_coredump); extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern void force_sigsegv(int sig); extern int force_sig_info(struct kernel_siginfo *); extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid); extern int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *, const struct cred *); extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); extern void force_fatal_sig(int); extern void force_exit_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline void clear_notify_signal(void) { clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); } /* * Returns 'true' if kick_process() is needed to force a transition from * user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work. */ static inline bool __set_notify_signal(struct task_struct *task) { return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && !wake_up_state(task, TASK_INTERRUPTIBLE); } /* * Called to break out of interruptible wait loops, and enter the * exit_to_user_mode_loop(). */ static inline void set_notify_signal(struct task_struct *task) { if (__set_notify_signal(task)) kick_process(task); } static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); return -ERESTARTNOINTR; } static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } static inline int signal_pending(struct task_struct *p) { /* * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same * behavior in terms of ensuring that we break out of wait loops * so that notify signal callbacks can be processed. */ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) return 1; return task_sigpending(p); } static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); } static inline int fatal_signal_pending(struct task_struct *p) { return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(unsigned int state, struct task_struct *p) { if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) return 0; if (!signal_pending(p)) return 0; return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } /* * This should only be used in fault handlers to decide whether we * should stop the current fault routine to handle the signals * instead, especially with the case where we've got interrupted with * a VM_FAULT_RETRY. */ static inline bool fault_signal_pending(vm_fault_t fault_flags, struct pt_regs *regs) { return unlikely((fault_flags & VM_FAULT_RETRY) && (fatal_signal_pending(current) || (user_mode(regs) && signal_pending(current)))); } /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. * This is required every time the blocked sigset_t changes. * callers must hold sighand->siglock. */ extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); extern void calculate_sigpending(void); extern void signal_wake_up_state(struct task_struct *t, unsigned int state); static inline void signal_wake_up(struct task_struct *t, bool fatal) { unsigned int state = 0; if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) { t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED); state = TASK_WAKEKILL | __TASK_TRACED; } signal_wake_up_state(t, state); } static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) { unsigned int state = 0; if (resume) { t->jobctl &= ~JOBCTL_TRACED; state = __TASK_TRACED; } signal_wake_up_state(t, state); } void task_join_group_stop(struct task_struct *task); #ifdef TIF_RESTORE_SIGMASK /* * Legacy restore_sigmask accessors. These are inefficient on * SMP architectures because they require atomic operations. */ /** * set_restore_sigmask() - make sure saved_sigmask processing gets done * * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code * will run before returning to user mode, to process the flag. For * all callers, TIF_SIGPENDING is already set or it's no harm to set * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the * arch code will notice on return to user mode, in case those bits * are scarce. We set TIF_SIGPENDING here to ensure that the arch * signal code always gets run when TIF_RESTORE_SIGMASK is set. */ static inline void set_restore_sigmask(void) { set_thread_flag(TIF_RESTORE_SIGMASK); } static inline void clear_tsk_restore_sigmask(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_RESTORE_SIGMASK); } static inline void clear_restore_sigmask(void) { clear_thread_flag(TIF_RESTORE_SIGMASK); } static inline bool test_tsk_restore_sigmask(struct task_struct *task) { return test_tsk_thread_flag(task, TIF_RESTORE_SIGMASK); } static inline bool test_restore_sigmask(void) { return test_thread_flag(TIF_RESTORE_SIGMASK); } static inline bool test_and_clear_restore_sigmask(void) { return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); } #else /* TIF_RESTORE_SIGMASK */ /* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ static inline void set_restore_sigmask(void) { current->restore_sigmask = true; } static inline void clear_tsk_restore_sigmask(struct task_struct *task) { task->restore_sigmask = false; } static inline void clear_restore_sigmask(void) { current->restore_sigmask = false; } static inline bool test_restore_sigmask(void) { return current->restore_sigmask; } static inline bool test_tsk_restore_sigmask(struct task_struct *task) { return task->restore_sigmask; } static inline bool test_and_clear_restore_sigmask(void) { if (!current->restore_sigmask) return false; current->restore_sigmask = false; return true; } #endif static inline void restore_saved_sigmask(void) { if (test_and_clear_restore_sigmask()) __set_current_blocked(¤t->saved_sigmask); } extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } static inline sigset_t *sigmask_to_save(void) { sigset_t *res = ¤t->blocked; if (unlikely(test_restore_sigmask())) res = ¤t->saved_sigmask; return res; } static inline int kill_cad_pid(int sig, int priv) { return kill_pid(cad_pid, sig, priv); } /* These can be the second arg to send_sig_info/send_group_sig_info. */ #define SEND_SIG_NOINFO ((struct kernel_siginfo *) 0) #define SEND_SIG_PRIV ((struct kernel_siginfo *) 1) static inline int __on_sig_stack(unsigned long sp) { #ifdef CONFIG_STACK_GROWSUP return sp >= current->sas_ss_sp && sp - current->sas_ss_sp < current->sas_ss_size; #else return sp > current->sas_ss_sp && sp - current->sas_ss_sp <= current->sas_ss_size; #endif } /* * True if we are on the alternate signal stack. */ static inline int on_sig_stack(unsigned long sp) { /* * If the signal stack is SS_AUTODISARM then, by construction, we * can't be on the signal stack unless user code deliberately set * SS_AUTODISARM when we were already on it. * * This improves reliability: if user state gets corrupted such that * the stack pointer points very close to the end of the signal stack, * then this check will enable the signal to be handled anyway. */ if (current->sas_ss_flags & SS_AUTODISARM) return 0; return __on_sig_stack(sp); } static inline int sas_ss_flags(unsigned long sp) { if (!current->sas_ss_size) return SS_DISABLE; return on_sig_stack(sp) ? SS_ONSTACK : 0; } static inline void sas_ss_reset(struct task_struct *p) { p->sas_ss_sp = 0; p->sas_ss_size = 0; p->sas_ss_flags = SS_DISABLE; } static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) { if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) #ifdef CONFIG_STACK_GROWSUP return current->sas_ss_sp; #else return current->sas_ss_sp + current->sas_ss_size; #endif return sp; } extern void __cleanup_sighand(struct sighand_struct *); extern void flush_itimer_signals(void); #define tasklist_empty() \ list_empty(&init_task.tasks) #define next_task(p) \ list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) extern bool current_is_single_threaded(void); /* * Without tasklist/siglock it is only rcu-safe if g can't exit/exec, * otherwise next_thread(t) will never reach g after list_del_rcu(g). */ #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) #define __for_each_thread(signal, t) \ list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \ lockdep_is_held(&tasklist_lock)) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) /* Careful: this is a double loop, 'break' won't work as expected. */ #define for_each_process_thread(p, t) \ for_each_process(p) for_each_thread(p, t) typedef int (*proc_visitor)(struct task_struct *p, void *data); void walk_process_tree(struct task_struct *top, proc_visitor, void *); static inline struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { struct pid *pid; if (type == PIDTYPE_PID) pid = task_pid(task); else pid = task->signal->pids[type]; return pid; } static inline struct pid *task_tgid(struct task_struct *task) { return task->signal->pids[PIDTYPE_TGID]; } /* * Without tasklist or RCU lock it is not safe to dereference * the result of task_pgrp/task_session even if task == current, * we can race with another thread doing sys_setsid/sys_setpgid. */ static inline struct pid *task_pgrp(struct task_struct *task) { return task->signal->pids[PIDTYPE_PGID]; } static inline struct pid *task_session(struct task_struct *task) { return task->signal->pids[PIDTYPE_SID]; } static inline int get_nr_threads(struct task_struct *task) { return task->signal->nr_threads; } static inline bool thread_group_leader(struct task_struct *p) { return p->exit_signal >= 0; } static inline bool same_thread_group(struct task_struct *p1, struct task_struct *p2) { return p1->signal == p2->signal; } /* * returns NULL if p is the last thread in the thread group */ static inline struct task_struct *__next_thread(struct task_struct *p) { return list_next_or_null_rcu(&p->signal->thread_head, &p->thread_node, struct task_struct, thread_node); } static inline struct task_struct *next_thread(struct task_struct *p) { return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) { return thread_group_leader(p) && list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) extern bool thread_group_exited(struct pid *pid); extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); static inline struct sighand_struct *lock_task_sighand(struct task_struct *task, unsigned long *flags) { struct sighand_struct *ret; ret = __lock_task_sighand(task, flags); (void)__cond_lock(&task->sighand->siglock, ret); return ret; } static inline void unlock_task_sighand(struct task_struct *task, unsigned long *flags) { spin_unlock_irqrestore(&task->sighand->siglock, *flags); } #ifdef CONFIG_LOCKDEP extern void lockdep_assert_task_sighand_held(struct task_struct *task); #else static inline void lockdep_assert_task_sighand_held(struct task_struct *task) { } #endif static inline unsigned long task_rlimit(const struct task_struct *task, unsigned int limit) { return READ_ONCE(task->signal->rlim[limit].rlim_cur); } static inline unsigned long task_rlimit_max(const struct task_struct *task, unsigned int limit) { return READ_ONCE(task->signal->rlim[limit].rlim_max); } static inline unsigned long rlimit(unsigned int limit) { return task_rlimit(current, limit); } static inline unsigned long rlimit_max(unsigned int limit) { return task_rlimit_max(current, limit); } #endif /* _LINUX_SCHED_SIGNAL_H */ |
110 110 93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | #include <linux/dcache.h> #include "internal.h" unsigned name_to_int(const struct qstr *qstr) { const char *name = qstr->name; int len = qstr->len; unsigned n = 0; if (len > 1 && *name == '0') goto out; do { unsigned c = *name++ - '0'; if (c > 9) goto out; if (n >= (~0U-9)/10) goto out; n *= 10; n += c; } while (--len > 0); return n; out: return ~0U; } |
22 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 | /* * Copyright (c) 2004 The Regents of the University of Michigan. * Copyright (c) 2012 Jeff Layton <jlayton@redhat.com> * All rights reserved. * * Andy Adamson <andros@citi.umich.edu> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include <crypto/hash.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/module.h> #include <net/net_namespace.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/clnt.h> #include <linux/nfsd/cld.h> #include "nfsd.h" #include "state.h" #include "vfs.h" #include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC /* Declarations */ struct nfsd4_client_tracking_ops { int (*init)(struct net *); void (*exit)(struct net *); void (*create)(struct nfs4_client *); void (*remove)(struct nfs4_client *); int (*check)(struct nfs4_client *); void (*grace_done)(struct nfsd_net *); uint8_t version; size_t msglen; }; static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops; static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2; /* Globals */ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; static int nfs4_save_creds(const struct cred **original_creds) { struct cred *new; new = prepare_creds(); if (!new) return -ENOMEM; new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; *original_creds = override_creds(new); put_cred(new); return 0; } static void nfs4_reset_creds(const struct cred *original) { revert_creds(original); } static void md5_to_hex(char *out, char *md5) { int i; for (i=0; i<16; i++) { unsigned char c = md5[i]; *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1); *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1); } *out = '\0'; } static int nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) { struct xdr_netobj cksum; struct crypto_shash *tfm; int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); tfm = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(tfm)) { status = PTR_ERR(tfm); goto out_no_tfm; } cksum.len = crypto_shash_digestsize(tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) { status = -ENOMEM; goto out; } status = crypto_shash_tfm_digest(tfm, clname->data, clname->len, cksum.data); if (status) goto out; md5_to_hex(dname, cksum.data); status = 0; out: kfree(cksum.data); crypto_free_shash(tfm); out_no_tfm: return status; } /* * If we had an error generating the recdir name for the legacy tracker * then warn the admin. If the error doesn't appear to be transient, * then disable recovery tracking. */ static void legacy_recdir_name_error(struct nfs4_client *clp, int error) { printk(KERN_ERR "NFSD: unable to generate recoverydir " "name (%d).\n", error); /* * if the algorithm just doesn't exist, then disable the recovery * tracker altogether. The crypto libs will generally return this if * FIPS is enabled as well. */ if (error == -ENOENT) { printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " "Reboot recovery will not function correctly!\n"); nfsd4_client_tracking_exit(clp->net); } } static void __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp, const char *dname, int len, struct nfsd_net *nn) { struct xdr_netobj name; struct xdr_netobj princhash = { .len = 0, .data = NULL }; struct nfs4_client_reclaim *crp; name.data = kmemdup(dname, len, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); return; } name.len = len; crp = nfs4_client_to_reclaim(name, princhash, nn); if (!crp) { kfree(name.data); return; } crp->cr_clp = clp; } static void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char dname[HEXDIR_LEN]; struct dentry *dir, *dentry; int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; if (!nn->rec_file) return; status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) return legacy_recdir_name_error(clp, status); status = nfs4_save_creds(&original_cred); if (status < 0) return; status = mnt_want_write_file(nn->rec_file); if (status) goto out_creds; dir = nn->rec_file->f_path.dentry; /* lock the parent */ inode_lock(d_inode(dir)); dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; } if (d_really_is_positive(dentry)) /* * In the 4.1 case, where we're called from * reclaim_complete(), records from the previous reboot * may still be left, so this is OK. * * In the 4.0 case, we should never get here; but we may * as well be forgiving and just succeed silently. */ goto out_put; status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); out_put: dput(dentry); out_unlock: inode_unlock(d_inode(dir)); if (status == 0) { if (nn->in_grace) __nfsd4_create_reclaim_record_grace(clp, dname, HEXDIR_LEN, nn); vfs_fsync(nn->rec_file, 0); } else { printk(KERN_ERR "NFSD: failed to write recovery record" " (err %d); please check that %s exists" " and is writeable", status, user_recovery_dirname); } mnt_drop_write_file(nn->rec_file); out_creds: nfs4_reset_creds(original_cred); } typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *); struct name_list { char name[HEXDIR_LEN]; struct list_head list; }; struct nfs4_dir_ctx { struct dir_context ctx; struct list_head names; }; static bool nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct nfs4_dir_ctx *ctx = container_of(__ctx, struct nfs4_dir_ctx, ctx); struct name_list *entry; if (namlen != HEXDIR_LEN - 1) return true; entry = kmalloc(sizeof(struct name_list), GFP_KERNEL); if (entry == NULL) return false; memcpy(entry->name, name, HEXDIR_LEN - 1); entry->name[HEXDIR_LEN - 1] = '\0'; list_add(&entry->list, &ctx->names); return true; } static int nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; struct nfs4_dir_ctx ctx = { .ctx.actor = nfsd4_build_namelist, .names = LIST_HEAD_INIT(ctx.names) }; struct name_list *entry, *tmp; int status; status = nfs4_save_creds(&original_cred); if (status < 0) return status; status = vfs_llseek(nn->rec_file, 0, SEEK_SET); if (status < 0) { nfs4_reset_creds(original_cred); return status; } status = iterate_dir(nn->rec_file, &ctx.ctx); inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); list_for_each_entry_safe(entry, tmp, &ctx.names, list) { if (!status) { struct dentry *dentry; dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); break; } status = f(dir, dentry, nn); dput(dentry); } list_del(&entry->list); kfree(entry); } inode_unlock(d_inode(dir)); nfs4_reset_creds(original_cred); list_for_each_entry_safe(entry, tmp, &ctx.names, list) { dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name); list_del(&entry->list); kfree(entry); } return status; } static int nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) { struct dentry *dir, *dentry; int status; dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); dir = nn->rec_file->f_path.dentry; inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); dentry = lookup_one_len(name, dir, namlen); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; } status = -ENOENT; if (d_really_is_negative(dentry)) goto out; status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry); out: dput(dentry); out_unlock: inode_unlock(d_inode(dir)); return status; } static void __nfsd4_remove_reclaim_record_grace(const char *dname, int len, struct nfsd_net *nn) { struct xdr_netobj name; struct nfs4_client_reclaim *crp; name.data = kmemdup(dname, len, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); return; } name.len = len; crp = nfsd4_find_reclaim_client(name, nn); kfree(name.data); if (crp) nfs4_remove_reclaim_record(crp, nn); } static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char dname[HEXDIR_LEN]; int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) return legacy_recdir_name_error(clp, status); status = mnt_want_write_file(nn->rec_file); if (status) goto out; clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); status = nfs4_save_creds(&original_cred); if (status < 0) goto out_drop_write; status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn); nfs4_reset_creds(original_cred); if (status == 0) { vfs_fsync(nn->rec_file, 0); if (nn->in_grace) __nfsd4_remove_reclaim_record_grace(dname, HEXDIR_LEN, nn); } out_drop_write: mnt_drop_write_file(nn->rec_file); out: if (status) printk("NFSD: Failed to remove expired client state directory" " %.*s\n", HEXDIR_LEN, dname); } static int purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { int status; struct xdr_netobj name; if (child->d_name.len != HEXDIR_LEN - 1) { printk("%s: illegal name %pd in recovery directory\n", __func__, child); /* Keep trying; maybe the others are OK: */ return 0; } name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); goto out; } name.len = HEXDIR_LEN; if (nfs4_has_reclaimed_state(name, nn)) goto out_free; status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child); if (status) printk("failed to remove client recovery directory %pd\n", child); out_free: kfree(name.data); out: /* Keep trying, success or failure: */ return 0; } static void nfsd4_recdir_purge_old(struct nfsd_net *nn) { int status; nn->in_grace = false; if (!nn->rec_file) return; status = mnt_want_write_file(nn->rec_file); if (status) goto out; status = nfsd4_list_rec_dir(purge_old, nn); if (status == 0) vfs_fsync(nn->rec_file, 0); mnt_drop_write_file(nn->rec_file); out: nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" " directory %pD\n", nn->rec_file); } static int load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { struct xdr_netobj name; struct xdr_netobj princhash = { .len = 0, .data = NULL }; if (child->d_name.len != HEXDIR_LEN - 1) { printk("%s: illegal name %pd in recovery directory\n", __func__, child); /* Keep trying; maybe the others are OK: */ return 0; } name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); goto out; } name.len = HEXDIR_LEN; if (!nfs4_client_to_reclaim(name, princhash, nn)) kfree(name.data); out: return 0; } static int nfsd4_recdir_load(struct net *net) { int status; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!nn->rec_file) return 0; status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" " directory %pD\n", nn->rec_file); return status; } /* * Hold reference to the recovery directory. */ static int nfsd4_init_recdir(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct cred *original_cred; int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", user_recovery_dirname); BUG_ON(nn->rec_file); status = nfs4_save_creds(&original_cred); if (status < 0) { printk("NFSD: Unable to change credentials to find recovery" " directory: error %d\n", status); return status; } nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); if (IS_ERR(nn->rec_file)) { printk("NFSD: unable to find recovery directory %s\n", user_recovery_dirname); status = PTR_ERR(nn->rec_file); nn->rec_file = NULL; } nfs4_reset_creds(original_cred); if (!status) nn->in_grace = true; return status; } static void nfsd4_shutdown_recdir(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!nn->rec_file) return; fput(nn->rec_file); nn->rec_file = NULL; } static int nfs4_legacy_state_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int i; nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->reclaim_str_hashtbl) return -ENOMEM; for (i = 0; i < CLIENT_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); nn->reclaim_str_hashtbl_size = 0; return 0; } static void nfs4_legacy_state_shutdown(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); kfree(nn->reclaim_str_hashtbl); } static int nfsd4_load_reboot_recovery_data(struct net *net) { int status; status = nfsd4_init_recdir(net); if (status) return status; status = nfsd4_recdir_load(net); if (status) nfsd4_shutdown_recdir(net); return status; } static int nfsd4_legacy_tracking_init(struct net *net) { int status; /* XXX: The legacy code won't work in a container */ if (net != &init_net) { pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n"); return -EINVAL; } status = nfs4_legacy_state_init(net); if (status) return status; status = nfsd4_load_reboot_recovery_data(net); if (status) goto err; pr_info("NFSD: Using legacy client tracking operations.\n"); return 0; err: nfs4_legacy_state_shutdown(net); return status; } static void nfsd4_legacy_tracking_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs4_release_reclaim(nn); nfsd4_shutdown_recdir(net); nfs4_legacy_state_shutdown(net); } /* * Change the NFSv4 recovery directory to recdir. */ int nfs4_reset_recoverydir(char *recdir) { int status; struct path path; status = kern_path(recdir, LOOKUP_FOLLOW, &path); if (status) return status; status = -ENOTDIR; if (d_is_dir(path.dentry)) { strcpy(user_recovery_dirname, recdir); status = 0; } path_put(&path); return status; } char * nfs4_recoverydir(void) { return user_recovery_dirname; } static int nfsd4_check_legacy_client(struct nfs4_client *clp) { int status; char dname[HEXDIR_LEN]; struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct xdr_netobj name; /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) { legacy_recdir_name_error(clp, status); return status; } /* look for it in the reclaim hashtable otherwise */ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); goto out_enoent; } name.len = HEXDIR_LEN; crp = nfsd4_find_reclaim_client(name, nn); kfree(name.data); if (crp) { set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); crp->cr_clp = clp; return 0; } out_enoent: return -ENOENT; } static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { .init = nfsd4_legacy_tracking_init, .exit = nfsd4_legacy_tracking_exit, .create = nfsd4_create_clid_dir, .remove = nfsd4_remove_clid_dir, .check = nfsd4_check_legacy_client, .grace_done = nfsd4_recdir_purge_old, .version = 1, .msglen = 0, }; /* Globals */ #define NFSD_PIPE_DIR "nfsd" #define NFSD_CLD_PIPE "cld" /* per-net-ns structure for holding cld upcall info */ struct cld_net { struct rpc_pipe *cn_pipe; spinlock_t cn_lock; struct list_head cn_list; unsigned int cn_xid; bool cn_has_legacy; struct crypto_shash *cn_tfm; }; struct cld_upcall { struct list_head cu_list; struct cld_net *cu_net; struct completion cu_done; union { struct cld_msg_hdr cu_hdr; struct cld_msg cu_msg; struct cld_msg_v2 cu_msg_v2; } cu_u; }; static int __cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) { int ret; struct rpc_pipe_msg msg; struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u); memset(&msg, 0, sizeof(msg)); msg.data = cmsg; msg.len = nn->client_tracking_ops->msglen; ret = rpc_queue_upcall(pipe, &msg); if (ret < 0) { goto out; } wait_for_completion(&cup->cu_done); if (msg.errno < 0) ret = msg.errno; out: return ret; } static int cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) { int ret; /* * -EAGAIN occurs when pipe is closed and reopened while there are * upcalls queued. */ do { ret = __cld_pipe_upcall(pipe, cmsg, nn); } while (ret == -EAGAIN); return ret; } static ssize_t __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, struct nfsd_net *nn) { uint8_t cmd, princhashlen; struct xdr_netobj name, princhash = { .len = 0, .data = NULL }; uint16_t namelen; struct cld_net *cn = nn->cld_net; if (get_user(cmd, &cmsg->cm_cmd)) { dprintk("%s: error when copying cmd from userspace", __func__); return -EFAULT; } if (cmd == Cld_GraceStart) { if (nn->client_tracking_ops->version >= 2) { const struct cld_clntinfo __user *ci; ci = &cmsg->cm_u.cm_clntinfo; if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; name.data = memdup_user(&ci->cc_name.cn_id, namelen); if (IS_ERR(name.data)) return PTR_ERR(name.data); name.len = namelen; get_user(princhashlen, &ci->cc_princhash.cp_len); if (princhashlen > 0) { princhash.data = memdup_user( &ci->cc_princhash.cp_data, princhashlen); if (IS_ERR(princhash.data)) { kfree(name.data); return PTR_ERR(princhash.data); } princhash.len = princhashlen; } else princhash.len = 0; } else { const struct cld_name __user *cnm; cnm = &cmsg->cm_u.cm_name; if (get_user(namelen, &cnm->cn_len)) return -EFAULT; name.data = memdup_user(&cnm->cn_id, namelen); if (IS_ERR(name.data)) return PTR_ERR(name.data); name.len = namelen; } if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { name.len = name.len - 5; memmove(name.data, name.data + 5, name.len); cn->cn_has_legacy = true; } if (!nfs4_client_to_reclaim(name, princhash, nn)) { kfree(name.data); kfree(princhash.data); return -EFAULT; } return nn->client_tracking_ops->msglen; } return -EFAULT; } static ssize_t cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct cld_upcall *tmp, *cup; struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src; struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src; uint32_t xid; struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info, nfsd_net_id); struct cld_net *cn = nn->cld_net; int16_t status; if (mlen != nn->client_tracking_ops->msglen) { dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, nn->client_tracking_ops->msglen); return -EINVAL; } /* copy just the xid so we can try to find that */ if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) { dprintk("%s: error when copying xid from userspace", __func__); return -EFAULT; } /* * copy the status so we know whether to remove the upcall from the * list (for -EINPROGRESS, we just want to make sure the xid is * valid, not remove the upcall from the list) */ if (get_user(status, &hdr->cm_status)) { dprintk("%s: error when copying status from userspace", __func__); return -EFAULT; } /* walk the list and find corresponding xid */ cup = NULL; spin_lock(&cn->cn_lock); list_for_each_entry(tmp, &cn->cn_list, cu_list) { if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) { cup = tmp; if (status != -EINPROGRESS) list_del_init(&cup->cu_list); break; } } spin_unlock(&cn->cn_lock); /* couldn't find upcall? */ if (!cup) { dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid); return -EINVAL; } if (status == -EINPROGRESS) return __cld_pipe_inprogress_downcall(cmsg, nn); if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0) return -EFAULT; complete(&cup->cu_done); return mlen; } static void cld_pipe_destroy_msg(struct rpc_pipe_msg *msg) { struct cld_msg *cmsg = msg->data; struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u.cu_msg); /* errno >= 0 means we got a downcall */ if (msg->errno >= 0) return; complete(&cup->cu_done); } static const struct rpc_pipe_ops cld_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = cld_pipe_downcall, .destroy_msg = cld_pipe_destroy_msg, }; static struct dentry * nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe) { struct dentry *dir, *dentry; dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR); if (dir == NULL) return ERR_PTR(-ENOENT); dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe); dput(dir); return dentry; } static void nfsd4_cld_unregister_sb(struct rpc_pipe *pipe) { if (pipe->dentry) rpc_unlink(pipe->dentry); } static struct dentry * nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe) { struct super_block *sb; struct dentry *dentry; sb = rpc_get_sb_net(net); if (!sb) return NULL; dentry = nfsd4_cld_register_sb(sb, pipe); rpc_put_sb_net(net); return dentry; } static void nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe) { struct super_block *sb; sb = rpc_get_sb_net(net); if (sb) { nfsd4_cld_unregister_sb(pipe); rpc_put_sb_net(net); } } /* Initialize rpc_pipefs pipe for communication with client tracking daemon */ static int __nfsd4_init_cld_pipe(struct net *net) { int ret; struct dentry *dentry; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct cld_net *cn; if (nn->cld_net) return 0; cn = kzalloc(sizeof(*cn), GFP_KERNEL); if (!cn) { ret = -ENOMEM; goto err; } cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(cn->cn_pipe)) { ret = PTR_ERR(cn->cn_pipe); goto err; } spin_lock_init(&cn->cn_lock); INIT_LIST_HEAD(&cn->cn_list); dentry = nfsd4_cld_register_net(net, cn->cn_pipe); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto err_destroy_data; } cn->cn_pipe->dentry = dentry; cn->cn_has_legacy = false; nn->cld_net = cn; return 0; err_destroy_data: rpc_destroy_pipe_data(cn->cn_pipe); err: kfree(cn); printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n", ret); return ret; } static int nfsd4_init_cld_pipe(struct net *net) { int status; status = __nfsd4_init_cld_pipe(net); if (!status) pr_info("NFSD: Using old nfsdcld client tracking operations.\n"); return status; } static void nfsd4_remove_cld_pipe(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct cld_net *cn = nn->cld_net; nfsd4_cld_unregister_net(net, cn->cn_pipe); rpc_destroy_pipe_data(cn->cn_pipe); if (cn->cn_tfm) crypto_free_shash(cn->cn_tfm); kfree(nn->cld_net); nn->cld_net = NULL; } static struct cld_upcall * alloc_cld_upcall(struct nfsd_net *nn) { struct cld_upcall *new, *tmp; struct cld_net *cn = nn->cld_net; new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return new; /* FIXME: hard cap on number in flight? */ restart_search: spin_lock(&cn->cn_lock); list_for_each_entry(tmp, &cn->cn_list, cu_list) { if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) { cn->cn_xid++; spin_unlock(&cn->cn_lock); goto restart_search; } } init_completion(&new->cu_done); new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version; put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid); new->cu_net = cn; list_add(&new->cu_list, &cn->cn_list); spin_unlock(&cn->cn_lock); dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid); return new; } static void free_cld_upcall(struct cld_upcall *victim) { struct cld_net *cn = victim->cu_net; spin_lock(&cn->cn_lock); list_del(&victim->cu_list); spin_unlock(&cn->cn_lock); kfree(victim); } /* Ask daemon to create a new record */ static void nfsd4_cld_create(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if it's already stored */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; cup = alloc_cld_upcall(nn); if (!cup) { ret = -ENOMEM; goto out_err; } cup->cu_u.cu_msg.cm_cmd = Cld_Create; cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, clp->cl_name.len); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } free_cld_upcall(cup); out_err: if (ret) printk(KERN_ERR "NFSD: Unable to create client " "record on stable storage: %d\n", ret); } /* Ask daemon to create a new record */ static void nfsd4_cld_create_v2(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; struct cld_msg_v2 *cmsg; struct crypto_shash *tfm = cn->cn_tfm; struct xdr_netobj cksum; char *principal = NULL; /* Don't upcall if it's already stored */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; cup = alloc_cld_upcall(nn); if (!cup) { ret = -ENOMEM; goto out_err; } cmsg = &cup->cu_u.cu_msg_v2; cmsg->cm_cmd = Cld_Create; cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len; memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data, clp->cl_name.len); if (clp->cl_cred.cr_raw_principal) principal = clp->cl_cred.cr_raw_principal; else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal) { cksum.len = crypto_shash_digestsize(tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) { ret = -ENOMEM; goto out; } ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal), cksum.data); if (ret) { kfree(cksum.data); goto out; } cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len; memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data, cksum.data, cksum.len); kfree(cksum.data); } else cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0; ret = cld_pipe_upcall(cn->cn_pipe, cmsg, nn); if (!ret) { ret = cmsg->cm_status; set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } out: free_cld_upcall(cup); out_err: if (ret) pr_err("NFSD: Unable to create client record on stable storage: %d\n", ret); } /* Ask daemon to create a new record */ static void nfsd4_cld_remove(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if it's already removed */ if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; cup = alloc_cld_upcall(nn); if (!cup) { ret = -ENOMEM; goto out_err; } cup->cu_u.cu_msg.cm_cmd = Cld_Remove; cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, clp->cl_name.len); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } free_cld_upcall(cup); out_err: if (ret) printk(KERN_ERR "NFSD: Unable to remove client " "record from stable storage: %d\n", ret); } /* * For older nfsdcld's that do not allow us to "slurp" the clients * from the tracking database during startup. * * Check for presence of a record, and update its timestamp */ static int nfsd4_cld_check_v0(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if one was already stored during this grace pd */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; cup = alloc_cld_upcall(nn); if (!cup) { printk(KERN_ERR "NFSD: Unable to check client record on " "stable storage: %d\n", -ENOMEM); return -ENOMEM; } cup->cu_u.cu_msg.cm_cmd = Cld_Check; cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, clp->cl_name.len); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } free_cld_upcall(cup); return ret; } /* * For newer nfsdcld's that allow us to "slurp" the clients * from the tracking database during startup. * * Check for presence of a record in the reclaim_str_hashtbl */ static int nfsd4_cld_check(struct nfs4_client *clp) { struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; int status; char dname[HEXDIR_LEN]; struct xdr_netobj name; /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; /* look for it in the reclaim hashtable otherwise */ crp = nfsd4_find_reclaim_client(clp->cl_name, nn); if (crp) goto found; if (cn->cn_has_legacy) { status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) return -ENOENT; name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); return -ENOENT; } name.len = HEXDIR_LEN; crp = nfsd4_find_reclaim_client(name, nn); kfree(name.data); if (crp) goto found; } return -ENOENT; found: crp->cr_clp = clp; return 0; } static int nfsd4_cld_check_v2(struct nfs4_client *clp) { struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; int status; char dname[HEXDIR_LEN]; struct xdr_netobj name; struct crypto_shash *tfm = cn->cn_tfm; struct xdr_netobj cksum; char *principal = NULL; /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; /* look for it in the reclaim hashtable otherwise */ crp = nfsd4_find_reclaim_client(clp->cl_name, nn); if (crp) goto found; if (cn->cn_has_legacy) { status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) return -ENOENT; name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data\n", __func__); return -ENOENT; } name.len = HEXDIR_LEN; crp = nfsd4_find_reclaim_client(name, nn); kfree(name.data); if (crp) goto found; } return -ENOENT; found: if (crp->cr_princhash.len) { if (clp->cl_cred.cr_raw_principal) principal = clp->cl_cred.cr_raw_principal; else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal == NULL) return -ENOENT; cksum.len = crypto_shash_digestsize(tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) return -ENOENT; status = crypto_shash_tfm_digest(tfm, principal, strlen(principal), cksum.data); if (status) { kfree(cksum.data); return -ENOENT; } if (memcmp(crp->cr_princhash.data, cksum.data, crp->cr_princhash.len)) { kfree(cksum.data); return -ENOENT; } kfree(cksum.data); } crp->cr_clp = clp; return 0; } static int nfsd4_cld_grace_start(struct nfsd_net *nn) { int ret; struct cld_upcall *cup; struct cld_net *cn = nn->cld_net; cup = alloc_cld_upcall(nn); if (!cup) { ret = -ENOMEM; goto out_err; } cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) ret = cup->cu_u.cu_msg.cm_status; free_cld_upcall(cup); out_err: if (ret) dprintk("%s: Unable to get clients from userspace: %d\n", __func__, ret); return ret; } /* For older nfsdcld's that need cm_gracetime */ static void nfsd4_cld_grace_done_v0(struct nfsd_net *nn) { int ret; struct cld_upcall *cup; struct cld_net *cn = nn->cld_net; cup = alloc_cld_upcall(nn); if (!cup) { ret = -ENOMEM; goto out_err; } cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) ret = cup->cu_u.cu_msg.cm_status; free_cld_upcall(cup); out_err: if (ret) printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); } /* * For newer nfsdcld's that do not need cm_gracetime. We also need to call * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl. */ static void nfsd4_cld_grace_done(struct nfsd_net *nn) { int ret; struct cld_upcall *cup; struct cld_net *cn = nn->cld_net; cup = alloc_cld_upcall(nn); if (!cup) { ret = -ENOMEM; goto out_err; } cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) ret = cup->cu_u.cu_msg.cm_status; free_cld_upcall(cup); out_err: nfs4_release_reclaim(nn); if (ret) printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); } static int nfs4_cld_state_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int i; nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->reclaim_str_hashtbl) return -ENOMEM; for (i = 0; i < CLIENT_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); nn->reclaim_str_hashtbl_size = 0; nn->track_reclaim_completes = true; atomic_set(&nn->nr_reclaim_complete, 0); return 0; } static void nfs4_cld_state_shutdown(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nn->track_reclaim_completes = false; kfree(nn->reclaim_str_hashtbl); } static bool cld_running(struct nfsd_net *nn) { struct cld_net *cn = nn->cld_net; struct rpc_pipe *pipe = cn->cn_pipe; return pipe->nreaders || pipe->nwriters; } static int nfsd4_cld_get_version(struct nfsd_net *nn) { int ret = 0; struct cld_upcall *cup; struct cld_net *cn = nn->cld_net; uint8_t version; cup = alloc_cld_upcall(nn); if (!cup) { ret = -ENOMEM; goto out_err; } cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; if (ret) goto out_free; version = cup->cu_u.cu_msg.cm_u.cm_version; dprintk("%s: userspace returned version %u\n", __func__, version); if (version < 1) version = 1; else if (version > CLD_UPCALL_VERSION) version = CLD_UPCALL_VERSION; switch (version) { case 1: nn->client_tracking_ops = &nfsd4_cld_tracking_ops; break; case 2: nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2; break; default: break; } } out_free: free_cld_upcall(cup); out_err: if (ret) dprintk("%s: Unable to get version from userspace: %d\n", __func__, ret); return ret; } static int nfsd4_cld_tracking_init(struct net *net) { int status; struct nfsd_net *nn = net_generic(net, nfsd_net_id); bool running; int retries = 10; struct crypto_shash *tfm; status = nfs4_cld_state_init(net); if (status) return status; status = __nfsd4_init_cld_pipe(net); if (status) goto err_shutdown; /* * rpc pipe upcalls take 30 seconds to time out, so we don't want to * queue an upcall unless we know that nfsdcld is running (because we * want this to fail fast so that nfsd4_client_tracking_init() can try * the next client tracking method). nfsdcld should already be running * before nfsd is started, so the wait here is for nfsdcld to open the * pipefs file we just created. */ while (!(running = cld_running(nn)) && retries--) msleep(100); if (!running) { status = -ETIMEDOUT; goto err_remove; } tfm = crypto_alloc_shash("sha256", 0, 0); if (IS_ERR(tfm)) { status = PTR_ERR(tfm); goto err_remove; } nn->cld_net->cn_tfm = tfm; status = nfsd4_cld_get_version(nn); if (status == -EOPNOTSUPP) pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n"); status = nfsd4_cld_grace_start(nn); if (status) { if (status == -EOPNOTSUPP) pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n"); nfs4_release_reclaim(nn); goto err_remove; } else pr_info("NFSD: Using nfsdcld client tracking operations.\n"); return 0; err_remove: nfsd4_remove_cld_pipe(net); err_shutdown: nfs4_cld_state_shutdown(net); return status; } static void nfsd4_cld_tracking_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs4_release_reclaim(nn); nfsd4_remove_cld_pipe(net); nfs4_cld_state_shutdown(net); } /* For older nfsdcld's */ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = { .init = nfsd4_init_cld_pipe, .exit = nfsd4_remove_cld_pipe, .create = nfsd4_cld_create, .remove = nfsd4_cld_remove, .check = nfsd4_cld_check_v0, .grace_done = nfsd4_cld_grace_done_v0, .version = 1, .msglen = sizeof(struct cld_msg), }; /* For newer nfsdcld's */ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { .init = nfsd4_cld_tracking_init, .exit = nfsd4_cld_tracking_exit, .create = nfsd4_cld_create, .remove = nfsd4_cld_remove, .check = nfsd4_cld_check, .grace_done = nfsd4_cld_grace_done, .version = 1, .msglen = sizeof(struct cld_msg), }; /* v2 create/check ops include the principal, if available */ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = { .init = nfsd4_cld_tracking_init, .exit = nfsd4_cld_tracking_exit, .create = nfsd4_cld_create_v2, .remove = nfsd4_cld_remove, .check = nfsd4_cld_check_v2, .grace_done = nfsd4_cld_grace_done, .version = 2, .msglen = sizeof(struct cld_msg_v2), }; /* upcall via usermodehelper */ static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack"; module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog), S_IRUGO|S_IWUSR); MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program"); static bool cltrack_legacy_disable; module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(cltrack_legacy_disable, "Disable legacy recoverydir conversion. Default: false"); #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" #define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION=" #define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START=" static char * nfsd4_cltrack_legacy_topdir(void) { int copied; size_t len; char *result; if (cltrack_legacy_disable) return NULL; len = strlen(LEGACY_TOPDIR_ENV_PREFIX) + strlen(nfs4_recoverydir()) + 1; result = kmalloc(len, GFP_KERNEL); if (!result) return result; copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s", nfs4_recoverydir()); if (copied >= len) { /* just return nothing if output was truncated */ kfree(result); return NULL; } return result; } static char * nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) { int copied; size_t len; char *result; if (cltrack_legacy_disable) return NULL; /* +1 is for '/' between "topdir" and "recdir" */ len = strlen(LEGACY_RECDIR_ENV_PREFIX) + strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN; result = kmalloc(len, GFP_KERNEL); if (!result) return result; copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/", nfs4_recoverydir()); if (copied > (len - HEXDIR_LEN)) { /* just return nothing if output will be truncated */ kfree(result); return NULL; } copied = nfs4_make_rec_clidname(result + copied, name); if (copied) { kfree(result); return NULL; } return result; } static char * nfsd4_cltrack_client_has_session(struct nfs4_client *clp) { int copied; size_t len; char *result; /* prefix + Y/N character + terminating NULL */ len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1; result = kmalloc(len, GFP_KERNEL); if (!result) return result; copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c", clp->cl_minorversion ? 'Y' : 'N'); if (copied >= len) { /* just return nothing if output was truncated */ kfree(result); return NULL; } return result; } static char * nfsd4_cltrack_grace_start(time64_t grace_start) { int copied; size_t len; char *result; /* prefix + max width of int64_t string + terminating NULL */ len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1; result = kmalloc(len, GFP_KERNEL); if (!result) return result; copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld", grace_start); if (copied >= len) { /* just return nothing if output was truncated */ kfree(result); return NULL; } return result; } static int nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1) { char *envp[3]; char *argv[4]; int ret; if (unlikely(!cltrack_prog[0])) { dprintk("%s: cltrack_prog is disabled\n", __func__); return -EACCES; } dprintk("%s: cmd: %s\n", __func__, cmd); dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)"); dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)"); envp[0] = env0; envp[1] = env1; envp[2] = NULL; argv[0] = (char *)cltrack_prog; argv[1] = cmd; argv[2] = arg; argv[3] = NULL; ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); /* * Disable the upcall mechanism if we're getting an ENOENT or EACCES * error. The admin can re-enable it on the fly by using sysfs * once the problem has been fixed. */ if (ret == -ENOENT || ret == -EACCES) { dprintk("NFSD: %s was not found or isn't executable (%d). " "Setting cltrack_prog to blank string!", cltrack_prog, ret); cltrack_prog[0] = '\0'; } dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret); return ret; } static char * bin_to_hex_dup(const unsigned char *src, int srclen) { char *buf; /* +1 for terminating NULL */ buf = kzalloc((srclen * 2) + 1, GFP_KERNEL); if (!buf) return buf; bin2hex(buf, src, srclen); return buf; } static int nfsd4_umh_cltrack_init(struct net *net) { int ret; struct nfsd_net *nn = net_generic(net, nfsd_net_id); char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time); /* XXX: The usermode helper s not working in container yet. */ if (net != &init_net) { pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n"); kfree(grace_start); return -EINVAL; } ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); kfree(grace_start); if (!ret) pr_info("NFSD: Using UMH upcall client tracking operations.\n"); return ret; } static void nfsd4_cltrack_upcall_lock(struct nfs4_client *clp) { wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK, TASK_UNINTERRUPTIBLE); } static void nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp) { smp_mb__before_atomic(); clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); smp_mb__after_atomic(); wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK); } static void nfsd4_umh_cltrack_create(struct nfs4_client *clp) { char *hexid, *has_session, *grace_start; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); /* * With v4.0 clients, there's little difference in outcome between a * create and check operation, and we can end up calling into this * function multiple times per client (once for each openowner). So, * for v4.0 clients skip upcalling once the client has been recorded * on stable storage. * * For v4.1+ clients, the outcome of the two operations is different, * so we must ensure that we upcall for the create operation. v4.1+ * clients call this on RECLAIM_COMPLETE though, so we should only end * up doing a single create upcall per client. */ if (clp->cl_minorversion == 0 && test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); if (!hexid) { dprintk("%s: can't allocate memory for upcall!\n", __func__); return; } has_session = nfsd4_cltrack_client_has_session(clp); grace_start = nfsd4_cltrack_grace_start(nn->boot_time); nfsd4_cltrack_upcall_lock(clp); if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start)) set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); nfsd4_cltrack_upcall_unlock(clp); kfree(has_session); kfree(grace_start); kfree(hexid); } static void nfsd4_umh_cltrack_remove(struct nfs4_client *clp) { char *hexid; if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); if (!hexid) { dprintk("%s: can't allocate memory for upcall!\n", __func__); return; } nfsd4_cltrack_upcall_lock(clp); if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) && nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0) clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); nfsd4_cltrack_upcall_unlock(clp); kfree(hexid); } static int nfsd4_umh_cltrack_check(struct nfs4_client *clp) { int ret; char *hexid, *has_session, *legacy; if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); if (!hexid) { dprintk("%s: can't allocate memory for upcall!\n", __func__); return -ENOMEM; } has_session = nfsd4_cltrack_client_has_session(clp); legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); nfsd4_cltrack_upcall_lock(clp); if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) { ret = 0; } else { ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy); if (ret == 0) set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } nfsd4_cltrack_upcall_unlock(clp); kfree(has_session); kfree(legacy); kfree(hexid); return ret; } static void nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) { char *legacy; char timestr[22]; /* FIXME: better way to determine max size? */ sprintf(timestr, "%lld", nn->boot_time); legacy = nfsd4_cltrack_legacy_topdir(); nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); kfree(legacy); } static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { .init = nfsd4_umh_cltrack_init, .exit = NULL, .create = nfsd4_umh_cltrack_create, .remove = nfsd4_umh_cltrack_remove, .check = nfsd4_umh_cltrack_check, .grace_done = nfsd4_umh_cltrack_grace_done, .version = 1, .msglen = 0, }; int nfsd4_client_tracking_init(struct net *net) { int status; struct path path; struct nfsd_net *nn = net_generic(net, nfsd_net_id); /* just run the init if it the method is already decided */ if (nn->client_tracking_ops) goto do_init; /* First, try to use nfsdcld */ nn->client_tracking_ops = &nfsd4_cld_tracking_ops; status = nn->client_tracking_ops->init(net); if (!status) return status; if (status != -ETIMEDOUT) { nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0; status = nn->client_tracking_ops->init(net); if (!status) return status; } /* * Next, try the UMH upcall. */ nn->client_tracking_ops = &nfsd4_umh_tracking_ops; status = nn->client_tracking_ops->init(net); if (!status) return status; /* * Finally, See if the recoverydir exists and is a directory. * If it is, then use the legacy ops. */ nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); if (!status) { status = d_is_dir(path.dentry); path_put(&path); if (!status) { status = -EINVAL; goto out; } } do_init: status = nn->client_tracking_ops->init(net); out: if (status) { printk(KERN_WARNING "NFSD: Unable to initialize client " "recovery tracking! (%d)\n", status); nn->client_tracking_ops = NULL; } return status; } void nfsd4_client_tracking_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (nn->client_tracking_ops) { if (nn->client_tracking_ops->exit) nn->client_tracking_ops->exit(net); nn->client_tracking_ops = NULL; } } void nfsd4_client_record_create(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (nn->client_tracking_ops) nn->client_tracking_ops->create(clp); } void nfsd4_client_record_remove(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (nn->client_tracking_ops) nn->client_tracking_ops->remove(clp); } int nfsd4_client_record_check(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (nn->client_tracking_ops) return nn->client_tracking_ops->check(clp); return -EOPNOTSUPP; } void nfsd4_record_grace_done(struct nfsd_net *nn) { if (nn->client_tracking_ops) nn->client_tracking_ops->grace_done(nn); } static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct super_block *sb = ptr; struct net *net = sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct cld_net *cn = nn->cld_net; struct dentry *dentry; int ret = 0; if (!try_module_get(THIS_MODULE)) return 0; if (!cn) { module_put(THIS_MODULE); return 0; } switch (event) { case RPC_PIPEFS_MOUNT: dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); break; } cn->cn_pipe->dentry = dentry; break; case RPC_PIPEFS_UMOUNT: if (cn->cn_pipe->dentry) nfsd4_cld_unregister_sb(cn->cn_pipe); break; default: ret = -ENOTSUPP; break; } module_put(THIS_MODULE); return ret; } static struct notifier_block nfsd4_cld_block = { .notifier_call = rpc_pipefs_event, }; int register_cld_notifier(void) { WARN_ON(!nfsd_net_id); return rpc_pipefs_notifier_register(&nfsd4_cld_block); } void unregister_cld_notifier(void) { rpc_pipefs_notifier_unregister(&nfsd4_cld_block); } |
10 2 1 1 2 3 10 10 10 10 10 10 8 2 8 10 5 1 1 1 1 1 1 7 1 7 19 19 19 10 10 1 1 1 10 10 11 11 5 5 9 8 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 | // SPDX-License-Identifier: GPL-2.0 /* * Functions related to mapping data to requests */ #include <linux/kernel.h> #include <linux/sched/task_stack.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/uio.h> #include "blk.h" struct bio_map_data { bool is_our_pages : 1; bool is_null_mapped : 1; struct iov_iter iter; struct iovec iov[]; }; static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, gfp_t gfp_mask) { struct bio_map_data *bmd; if (data->nr_segs > UIO_MAXIOV) return NULL; bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); if (!bmd) return NULL; bmd->iter = *data; if (iter_is_iovec(data)) { memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs); bmd->iter.__iov = bmd->iov; } return bmd; } /** * bio_copy_from_iter - copy all pages from iov_iter to bio * @bio: The &struct bio which describes the I/O as destination * @iter: iov_iter as source * * Copy all pages from iov_iter to bio. * Returns 0 on success, or error on failure. */ static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) { struct bio_vec *bvec; struct bvec_iter_all iter_all; bio_for_each_segment_all(bvec, bio, iter_all) { ssize_t ret; ret = copy_page_from_iter(bvec->bv_page, bvec->bv_offset, bvec->bv_len, iter); if (!iov_iter_count(iter)) break; if (ret < bvec->bv_len) return -EFAULT; } return 0; } /** * bio_copy_to_iter - copy all pages from bio to iov_iter * @bio: The &struct bio which describes the I/O as source * @iter: iov_iter as destination * * Copy all pages from bio to iov_iter. * Returns 0 on success, or error on failure. */ static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) { struct bio_vec *bvec; struct bvec_iter_all iter_all; bio_for_each_segment_all(bvec, bio, iter_all) { ssize_t ret; ret = copy_page_to_iter(bvec->bv_page, bvec->bv_offset, bvec->bv_len, &iter); if (!iov_iter_count(&iter)) break; if (ret < bvec->bv_len) return -EFAULT; } return 0; } /** * bio_uncopy_user - finish previously mapped bio * @bio: bio being terminated * * Free pages allocated from bio_copy_user_iov() and write back data * to user space in case of a read. */ static int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; int ret = 0; if (!bmd->is_null_mapped) { /* * if we're in a workqueue, the request is orphaned, so * don't copy into a random user address space, just free * and return -EINTR so user space doesn't expect any data. */ if (!current->mm) ret = -EINTR; else if (bio_data_dir(bio) == READ) ret = bio_copy_to_iter(bio, bmd->iter); if (bmd->is_our_pages) bio_free_pages(bio); } kfree(bmd); return ret; } static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, struct iov_iter *iter, gfp_t gfp_mask) { struct bio_map_data *bmd; struct page *page; struct bio *bio; int i = 0, ret; int nr_pages; unsigned int len = iter->count; unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; bmd = bio_alloc_map_data(iter, gfp_mask); if (!bmd) return -ENOMEM; /* * We need to do a deep copy of the iov_iter including the iovecs. * The caller provided iov might point to an on-stack or otherwise * shortlived one. */ bmd->is_our_pages = !map_data; bmd->is_null_mapped = (map_data && map_data->null_mapped); nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); ret = -ENOMEM; bio = bio_kmalloc(nr_pages, gfp_mask); if (!bio) goto out_bmd; bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq)); if (map_data) { nr_pages = 1U << map_data->page_order; i = map_data->offset / PAGE_SIZE; } while (len) { unsigned int bytes = PAGE_SIZE; bytes -= offset; if (bytes > len) bytes = len; if (map_data) { if (i == map_data->nr_entries * nr_pages) { ret = -ENOMEM; goto cleanup; } page = map_data->pages[i / nr_pages]; page += (i % nr_pages); i++; } else { page = alloc_page(GFP_NOIO | gfp_mask); if (!page) { ret = -ENOMEM; goto cleanup; } } if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { if (!map_data) __free_page(page); break; } len -= bytes; offset = 0; } if (map_data) map_data->offset += bio->bi_iter.bi_size; /* * success */ if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); iov_iter_advance(iter, bio->bi_iter.bi_size); } bio->bi_private = bmd; ret = blk_rq_append_bio(rq, bio); if (ret) goto cleanup; return 0; cleanup: if (!map_data) bio_free_pages(bio); bio_uninit(bio); kfree(bio); out_bmd: kfree(bmd); return ret; } static void blk_mq_map_bio_put(struct bio *bio) { if (bio->bi_opf & REQ_ALLOC_CACHE) { bio_put(bio); } else { bio_uninit(bio); kfree(bio); } } static struct bio *blk_rq_map_bio_alloc(struct request *rq, unsigned int nr_vecs, gfp_t gfp_mask) { struct bio *bio; if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) { bio = bio_alloc_bioset(NULL, nr_vecs, rq->cmd_flags, gfp_mask, &fs_bio_set); if (!bio) return NULL; } else { bio = bio_kmalloc(nr_vecs, gfp_mask); if (!bio) return NULL; bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); } return bio; } static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { iov_iter_extraction_t extraction_flags = 0; unsigned int max_sectors = queue_max_hw_sectors(rq->q); unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); struct bio *bio; int ret; int j; if (!iov_iter_count(iter)) return -EINVAL; bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); if (bio == NULL) return -ENOMEM; if (blk_queue_pci_p2pdma(rq->q)) extraction_flags |= ITER_ALLOW_P2PDMA; if (iov_iter_extract_will_pin(iter)) bio_set_flag(bio, BIO_PAGE_PINNED); while (iov_iter_count(iter)) { struct page *stack_pages[UIO_FASTIOV]; struct page **pages = stack_pages; ssize_t bytes; size_t offs; int npages; if (nr_vecs > ARRAY_SIZE(stack_pages)) pages = NULL; bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX, nr_vecs, extraction_flags, &offs); if (unlikely(bytes <= 0)) { ret = bytes ? bytes : -EFAULT; goto out_unmap; } npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); if (unlikely(offs & queue_dma_alignment(rq->q))) j = 0; else { for (j = 0; j < npages; j++) { struct page *page = pages[j]; unsigned int n = PAGE_SIZE - offs; bool same_page = false; if (n > bytes) n = bytes; if (!bio_add_hw_page(rq->q, bio, page, n, offs, max_sectors, &same_page)) break; if (same_page) bio_release_page(bio, page); bytes -= n; offs = 0; } } /* * release the pages we didn't map into the bio, if any */ while (j < npages) bio_release_page(bio, pages[j++]); if (pages != stack_pages) kvfree(pages); /* couldn't stuff something into bio? */ if (bytes) { iov_iter_revert(iter, bytes); break; } } ret = blk_rq_append_bio(rq, bio); if (ret) goto out_unmap; return 0; out_unmap: bio_release_pages(bio, false); blk_mq_map_bio_put(bio); return ret; } static void bio_invalidate_vmalloc_pages(struct bio *bio) { #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE if (bio->bi_private && !op_is_write(bio_op(bio))) { unsigned long i, len = 0; for (i = 0; i < bio->bi_vcnt; i++) len += bio->bi_io_vec[i].bv_len; invalidate_kernel_vmap_range(bio->bi_private, len); } #endif } static void bio_map_kern_endio(struct bio *bio) { bio_invalidate_vmalloc_pages(bio); bio_uninit(bio); kfree(bio); } /** * bio_map_kern - map kernel address into bio * @q: the struct request_queue for the bio * @data: pointer to buffer to map * @len: length in bytes * @gfp_mask: allocation flags for bio allocation * * Map the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ static struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask) { unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long start = kaddr >> PAGE_SHIFT; const int nr_pages = end - start; bool is_vmalloc = is_vmalloc_addr(data); struct page *page; int offset, i; struct bio *bio; bio = bio_kmalloc(nr_pages, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); if (is_vmalloc) { flush_kernel_vmap_range(data, len); bio->bi_private = data; } offset = offset_in_page(kaddr); for (i = 0; i < nr_pages; i++) { unsigned int bytes = PAGE_SIZE - offset; if (len <= 0) break; if (bytes > len) bytes = len; if (!is_vmalloc) page = virt_to_page(data); else page = vmalloc_to_page(data); if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { /* we don't support partial mappings */ bio_uninit(bio); kfree(bio); return ERR_PTR(-EINVAL); } data += bytes; len -= bytes; offset = 0; } bio->bi_end_io = bio_map_kern_endio; return bio; } static void bio_copy_kern_endio(struct bio *bio) { bio_free_pages(bio); bio_uninit(bio); kfree(bio); } static void bio_copy_kern_endio_read(struct bio *bio) { char *p = bio->bi_private; struct bio_vec *bvec; struct bvec_iter_all iter_all; bio_for_each_segment_all(bvec, bio, iter_all) { memcpy_from_bvec(p, bvec); p += bvec->bv_len; } bio_copy_kern_endio(bio); } /** * bio_copy_kern - copy kernel address into bio * @q: the struct request_queue for the bio * @data: pointer to buffer to copy * @len: length in bytes * @gfp_mask: allocation flags for bio and page allocation * @reading: data direction is READ * * copy the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ static struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask, int reading) { unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long start = kaddr >> PAGE_SHIFT; struct bio *bio; void *p = data; int nr_pages = 0; /* * Overflow, abort */ if (end < start) return ERR_PTR(-EINVAL); nr_pages = end - start; bio = bio_kmalloc(nr_pages, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); while (len) { struct page *page; unsigned int bytes = PAGE_SIZE; if (bytes > len) bytes = len; page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask); if (!page) goto cleanup; if (!reading) memcpy(page_address(page), p, bytes); if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) break; len -= bytes; p += bytes; } if (reading) { bio->bi_end_io = bio_copy_kern_endio_read; bio->bi_private = data; } else { bio->bi_end_io = bio_copy_kern_endio; } return bio; cleanup: bio_free_pages(bio); bio_uninit(bio); kfree(bio); return ERR_PTR(-ENOMEM); } /* * Append a bio to a passthrough request. Only works if the bio can be merged * into the request based on the driver constraints. */ int blk_rq_append_bio(struct request *rq, struct bio *bio) { struct bvec_iter iter; struct bio_vec bv; unsigned int nr_segs = 0; bio_for_each_bvec(bv, bio, iter) nr_segs++; if (!rq->bio) { blk_rq_bio_prep(rq, bio, nr_segs); } else { if (!ll_back_merge_fn(rq, bio, nr_segs)) return -EINVAL; rq->biotail->bi_next = bio; rq->biotail = bio; rq->__data_len += (bio)->bi_iter.bi_size; bio_crypt_free_ctx(bio); } return 0; } EXPORT_SYMBOL(blk_rq_append_bio); /* Prepare bio for passthrough IO given ITER_BVEC iter */ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) { struct request_queue *q = rq->q; size_t nr_iter = iov_iter_count(iter); size_t nr_segs = iter->nr_segs; struct bio_vec *bvecs, *bvprvp = NULL; const struct queue_limits *lim = &q->limits; unsigned int nsegs = 0, bytes = 0; struct bio *bio; size_t i; if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q)) return -EINVAL; if (nr_segs > queue_max_segments(q)) return -EINVAL; /* no iovecs to alloc, as we already have a BVEC iterator */ bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); if (bio == NULL) return -ENOMEM; bio_iov_bvec_set(bio, (struct iov_iter *)iter); blk_rq_bio_prep(rq, bio, nr_segs); /* loop to perform a bunch of sanity checks */ bvecs = (struct bio_vec *)iter->bvec; for (i = 0; i < nr_segs; i++) { struct bio_vec *bv = &bvecs[i]; /* * If the queue doesn't support SG gaps and adding this * offset would create a gap, fallback to copy. */ if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) { blk_mq_map_bio_put(bio); return -EREMOTEIO; } /* check full condition */ if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len) goto put_bio; if (bytes + bv->bv_len > nr_iter) goto put_bio; if (bv->bv_offset + bv->bv_len > PAGE_SIZE) goto put_bio; nsegs++; bytes += bv->bv_len; bvprvp = bv; } return 0; put_bio: blk_mq_map_bio_put(bio); return -EINVAL; } /** * blk_rq_map_user_iov - map user data to a request, for passthrough requests * @q: request queue where request should be inserted * @rq: request to map data to * @map_data: pointer to the rq_map_data holding pages (if necessary) * @iter: iovec iterator * @gfp_mask: memory allocation flags * * Description: * Data will be mapped directly for zero copy I/O, if possible. Otherwise * a kernel bounce buffer is used. * * A matching blk_rq_unmap_user() must be issued at the end of I/O, while * still in process context. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, const struct iov_iter *iter, gfp_t gfp_mask) { bool copy = false, map_bvec = false; unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); struct bio *bio = NULL; struct iov_iter i; int ret = -EINVAL; if (map_data) copy = true; else if (blk_queue_may_bounce(q)) copy = true; else if (iov_iter_alignment(iter) & align) copy = true; else if (iov_iter_is_bvec(iter)) map_bvec = true; else if (!user_backed_iter(iter)) copy = true; else if (queue_virt_boundary(q)) copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); if (map_bvec) { ret = blk_rq_map_user_bvec(rq, iter); if (!ret) return 0; if (ret != -EREMOTEIO) goto fail; /* fall back to copying the data on limits mismatches */ copy = true; } i = *iter; do { if (copy) ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); else ret = bio_map_user_iov(rq, &i, gfp_mask); if (ret) goto unmap_rq; if (!bio) bio = rq->bio; } while (iov_iter_count(&i)); return 0; unmap_rq: blk_rq_unmap_user(bio); fail: rq->bio = NULL; return ret; } EXPORT_SYMBOL(blk_rq_map_user_iov); int blk_rq_map_user(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, void __user *ubuf, unsigned long len, gfp_t gfp_mask) { struct iov_iter i; int ret = import_ubuf(rq_data_dir(rq), ubuf, len, &i); if (unlikely(ret < 0)) return ret; return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); } EXPORT_SYMBOL(blk_rq_map_user); int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data, void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask, bool vec, int iov_count, bool check_iter_count, int rw) { int ret = 0; if (vec) { struct iovec fast_iov[UIO_FASTIOV]; struct iovec *iov = fast_iov; struct iov_iter iter; ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len, UIO_FASTIOV, &iov, &iter); if (ret < 0) return ret; if (iov_count) { /* SG_IO howto says that the shorter of the two wins */ iov_iter_truncate(&iter, buf_len); if (check_iter_count && !iov_iter_count(&iter)) { kfree(iov); return -EINVAL; } } ret = blk_rq_map_user_iov(req->q, req, map_data, &iter, gfp_mask); kfree(iov); } else if (buf_len) { ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len, gfp_mask); } return ret; } EXPORT_SYMBOL(blk_rq_map_user_io); /** * blk_rq_unmap_user - unmap a request with user data * @bio: start of bio list * * Description: * Unmap a rq previously mapped by blk_rq_map_user(). The caller must * supply the original rq->bio from the blk_rq_map_user() return, since * the I/O completion may have changed rq->bio. */ int blk_rq_unmap_user(struct bio *bio) { struct bio *next_bio; int ret = 0, ret2; while (bio) { if (bio->bi_private) { ret2 = bio_uncopy_user(bio); if (ret2 && !ret) ret = ret2; } else { bio_release_pages(bio, bio_data_dir(bio) == READ); } next_bio = bio; bio = bio->bi_next; blk_mq_map_bio_put(next_bio); } return ret; } EXPORT_SYMBOL(blk_rq_unmap_user); /** * blk_rq_map_kern - map kernel data to a request, for passthrough requests * @q: request queue where request should be inserted * @rq: request to fill * @kbuf: the kernel buffer * @len: length of user data * @gfp_mask: memory allocation flags * * Description: * Data will be mapped directly if possible. Otherwise a bounce * buffer is used. Can be called multiple times to append multiple * buffers. */ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; struct bio *bio; int ret; if (len > (queue_max_hw_sectors(q) << 9)) return -EINVAL; if (!len || !kbuf) return -EINVAL; if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) || blk_queue_may_bounce(q)) bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); else bio = bio_map_kern(q, kbuf, len, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= req_op(rq); ret = blk_rq_append_bio(rq, bio); if (unlikely(ret)) { bio_uninit(bio); kfree(bio); } return ret; } EXPORT_SYMBOL(blk_rq_map_kern); |
5 1 1 5 6 1 2 7 1 6 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 | // SPDX-License-Identifier: GPL-2.0-only /* * 6pack.c This module implements the 6pack protocol for kernel-based * devices like TTY. It interfaces between a raw TTY and the * kernel's AX.25 protocol layers. * * Authors: Andreas Könsgen <ajk@comnets.uni-bremen.de> * Ralf Baechle DL5RB <ralf@linux-mips.org> * * Quite a lot of stuff "stolen" by Joerg Reuter from slip.c, written by * * Laurence Culhane, <loz@holmes.demon.co.uk> * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org> */ #include <linux/module.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/timer.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/semaphore.h> #include <linux/refcount.h> #define SIXPACK_VERSION "Revision: 0.3.0" /* sixpack priority commands */ #define SIXP_SEOF 0x40 /* start and end of a 6pack frame */ #define SIXP_TX_URUN 0x48 /* transmit overrun */ #define SIXP_RX_ORUN 0x50 /* receive overrun */ #define SIXP_RX_BUF_OVL 0x58 /* receive buffer overflow */ #define SIXP_CHKSUM 0xFF /* valid checksum of a 6pack frame */ /* masks to get certain bits out of the status bytes sent by the TNC */ #define SIXP_CMD_MASK 0xC0 #define SIXP_CHN_MASK 0x07 #define SIXP_PRIO_CMD_MASK 0x80 #define SIXP_STD_CMD_MASK 0x40 #define SIXP_PRIO_DATA_MASK 0x38 #define SIXP_TX_MASK 0x20 #define SIXP_RX_MASK 0x10 #define SIXP_RX_DCD_MASK 0x18 #define SIXP_LEDS_ON 0x78 #define SIXP_LEDS_OFF 0x60 #define SIXP_CON 0x08 #define SIXP_STA 0x10 #define SIXP_FOUND_TNC 0xe9 #define SIXP_CON_ON 0x68 #define SIXP_DCD_MASK 0x08 #define SIXP_DAMA_OFF 0 /* default level 2 parameters */ #define SIXP_TXDELAY 25 /* 250 ms */ #define SIXP_PERSIST 50 /* in 256ths */ #define SIXP_SLOTTIME 10 /* 100 ms */ #define SIXP_INIT_RESYNC_TIMEOUT (3*HZ/2) /* in 1 s */ #define SIXP_RESYNC_TIMEOUT 5*HZ /* in 1 s */ /* 6pack configuration. */ #define SIXP_NRUNIT 31 /* MAX number of 6pack channels */ #define SIXP_MTU 256 /* Default MTU */ enum sixpack_flags { SIXPF_ERROR, /* Parity, etc. error */ }; struct sixpack { /* Various fields. */ struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ /* These are pointers to the malloc()ed frame buffers. */ unsigned char *rbuff; /* receiver buffer */ int rcount; /* received chars counter */ unsigned char *xbuff; /* transmitter buffer */ unsigned char *xhead; /* next byte to XMIT */ int xleft; /* bytes left in XMIT queue */ unsigned char raw_buf[4]; unsigned char cooked_buf[400]; unsigned int rx_count; unsigned int rx_count_cooked; spinlock_t rxlock; int mtu; /* Our mtu (to spot changes!) */ int buffsize; /* Max buffers sizes */ unsigned long flags; /* Flag values/ mode etc */ unsigned char mode; /* 6pack mode */ /* 6pack stuff */ unsigned char tx_delay; unsigned char persistence; unsigned char slottime; unsigned char duplex; unsigned char led_state; unsigned char status; unsigned char status1; unsigned char status2; unsigned char tx_enable; unsigned char tnc_state; struct timer_list tx_t; struct timer_list resync_t; refcount_t refcnt; struct completion dead; spinlock_t lock; }; #define AX25_6PACK_HEADER_LEN 0 static void sixpack_decode(struct sixpack *, const unsigned char[], int); static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char); /* * Perform the persistence/slottime algorithm for CSMA access. If the * persistence check was successful, write the data to the serial driver. * Note that in case of DAMA operation, the data is not sent here. */ static void sp_xmit_on_air(struct timer_list *t) { struct sixpack *sp = from_timer(sp, t, tx_t); int actual, when = sp->slottime; static unsigned char random; random = random * 17 + 41; if (((sp->status1 & SIXP_DCD_MASK) == 0) && (random < sp->persistence)) { sp->led_state = 0x70; sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->tx_enable = 1; actual = sp->tty->ops->write(sp->tty, sp->xbuff, sp->status2); sp->xleft -= actual; sp->xhead += actual; sp->led_state = 0x60; sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->status2 = 0; } else mod_timer(&sp->tx_t, jiffies + ((when + 1) * HZ) / 100); } /* ----> 6pack timer interrupt handler and friends. <---- */ /* Encapsulate one AX.25 frame and stuff into a TTY queue. */ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len) { unsigned char *msg, *p = icp; int actual, count; if (len > sp->mtu) { /* sp->mtu = AX25_MTU = max. PACLEN = 256 */ msg = "oversized transmit packet!"; goto out_drop; } if (p[0] > 5) { msg = "invalid KISS command"; goto out_drop; } if ((p[0] != 0) && (len > 2)) { msg = "KISS control packet too long"; goto out_drop; } if ((p[0] == 0) && (len < 15)) { msg = "bad AX.25 packet to transmit"; goto out_drop; } count = encode_sixpack(p, sp->xbuff, len, sp->tx_delay); set_bit(TTY_DO_WRITE_WAKEUP, &sp->tty->flags); switch (p[0]) { case 1: sp->tx_delay = p[1]; return; case 2: sp->persistence = p[1]; return; case 3: sp->slottime = p[1]; return; case 4: /* ignored */ return; case 5: sp->duplex = p[1]; return; } if (p[0] != 0) return; /* * In case of fullduplex or DAMA operation, we don't take care about the * state of the DCD or of any timers, as the determination of the * correct time to send is the job of the AX.25 layer. We send * immediately after data has arrived. */ if (sp->duplex == 1) { sp->led_state = 0x70; sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->tx_enable = 1; actual = sp->tty->ops->write(sp->tty, sp->xbuff, count); sp->xleft = count - actual; sp->xhead = sp->xbuff + actual; sp->led_state = 0x60; sp->tty->ops->write(sp->tty, &sp->led_state, 1); } else { sp->xleft = count; sp->xhead = sp->xbuff; sp->status2 = count; sp_xmit_on_air(&sp->tx_t); } return; out_drop: sp->dev->stats.tx_dropped++; netif_start_queue(sp->dev); if (net_ratelimit()) printk(KERN_DEBUG "%s: %s - dropped.\n", sp->dev->name, msg); } /* Encapsulate an IP datagram and kick it into a TTY queue. */ static netdev_tx_t sp_xmit(struct sk_buff *skb, struct net_device *dev) { struct sixpack *sp = netdev_priv(dev); if (skb->protocol == htons(ETH_P_IP)) return ax25_ip_xmit(skb); spin_lock_bh(&sp->lock); /* We were not busy, so we are now... :-) */ netif_stop_queue(dev); dev->stats.tx_bytes += skb->len; sp_encaps(sp, skb->data, skb->len); spin_unlock_bh(&sp->lock); dev_kfree_skb(skb); return NETDEV_TX_OK; } static int sp_open_dev(struct net_device *dev) { struct sixpack *sp = netdev_priv(dev); if (sp->tty == NULL) return -ENODEV; return 0; } /* Close the low-level part of the 6pack channel. */ static int sp_close(struct net_device *dev) { struct sixpack *sp = netdev_priv(dev); spin_lock_bh(&sp->lock); if (sp->tty) { /* TTY discipline is running. */ clear_bit(TTY_DO_WRITE_WAKEUP, &sp->tty->flags); } netif_stop_queue(dev); spin_unlock_bh(&sp->lock); return 0; } static int sp_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr_ax25 *sa = addr; netif_tx_lock_bh(dev); netif_addr_lock(dev); __dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); return 0; } static const struct net_device_ops sp_netdev_ops = { .ndo_open = sp_open_dev, .ndo_stop = sp_close, .ndo_start_xmit = sp_xmit, .ndo_set_mac_address = sp_set_mac_address, }; static void sp_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->netdev_ops = &sp_netdev_ops; dev->mtu = SIXP_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->header_ops = &ax25_header_ops; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_AX25; dev->tx_queue_len = 10; /* Only activated in AX.25 mode */ memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); dev_addr_set(dev, (u8 *)&ax25_defaddr); dev->flags = 0; } /* Send one completely decapsulated IP datagram to the IP layer. */ /* * This is the routine that sends the received data to the kernel AX.25. * 'cmd' is the KISS command. For AX.25 data, it is zero. */ static void sp_bump(struct sixpack *sp, char cmd) { struct sk_buff *skb; int count; unsigned char *ptr; count = sp->rcount + 1; sp->dev->stats.rx_bytes += count; if ((skb = dev_alloc_skb(count + 1)) == NULL) goto out_mem; ptr = skb_put(skb, count + 1); *ptr++ = cmd; /* KISS command */ memcpy(ptr, sp->cooked_buf + 1, count); skb->protocol = ax25_type_trans(skb, sp->dev); netif_rx(skb); sp->dev->stats.rx_packets++; return; out_mem: sp->dev->stats.rx_dropped++; } /* ----------------------------------------------------------------------- */ /* * We have a potential race on dereferencing tty->disc_data, because the tty * layer provides no locking at all - thus one cpu could be running * sixpack_receive_buf while another calls sixpack_close, which zeroes * tty->disc_data and frees the memory that sixpack_receive_buf is using. The * best way to fix this is to use a rwlock in the tty struct, but for now we * use a single global rwlock for all ttys in ppp line discipline. */ static DEFINE_RWLOCK(disc_data_lock); static struct sixpack *sp_get(struct tty_struct *tty) { struct sixpack *sp; read_lock(&disc_data_lock); sp = tty->disc_data; if (sp) refcount_inc(&sp->refcnt); read_unlock(&disc_data_lock); return sp; } static void sp_put(struct sixpack *sp) { if (refcount_dec_and_test(&sp->refcnt)) complete(&sp->dead); } /* * Called by the TTY driver when there's room for more data. If we have * more packets to send, we send them here. */ static void sixpack_write_wakeup(struct tty_struct *tty) { struct sixpack *sp = sp_get(tty); int actual; if (!sp) return; if (sp->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sp->dev->stats.tx_packets++; clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); sp->tx_enable = 0; netif_wake_queue(sp->dev); goto out; } if (sp->tx_enable) { actual = tty->ops->write(tty, sp->xhead, sp->xleft); sp->xleft -= actual; sp->xhead += actual; } out: sp_put(sp); } /* ----------------------------------------------------------------------- */ /* * Handle the 'receiver data ready' interrupt. * This function is called by the tty module in the kernel when * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ static void sixpack_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct sixpack *sp; int count1; if (!count) return; sp = sp_get(tty); if (!sp) return; /* Read the characters out of the buffer */ count1 = count; while (count) { count--; if (fp && *fp++) { if (!test_and_set_bit(SIXPF_ERROR, &sp->flags)) sp->dev->stats.rx_errors++; continue; } } sixpack_decode(sp, cp, count1); sp_put(sp); tty_unthrottle(tty); } /* * Try to resync the TNC. Called by the resync timer defined in * decode_prio_command */ #define TNC_UNINITIALIZED 0 #define TNC_UNSYNC_STARTUP 1 #define TNC_UNSYNCED 2 #define TNC_IN_SYNC 3 static void __tnc_set_sync_state(struct sixpack *sp, int new_tnc_state) { char *msg; switch (new_tnc_state) { default: /* gcc oh piece-o-crap ... */ case TNC_UNSYNC_STARTUP: msg = "Synchronizing with TNC"; break; case TNC_UNSYNCED: msg = "Lost synchronization with TNC\n"; break; case TNC_IN_SYNC: msg = "Found TNC"; break; } sp->tnc_state = new_tnc_state; printk(KERN_INFO "%s: %s\n", sp->dev->name, msg); } static inline void tnc_set_sync_state(struct sixpack *sp, int new_tnc_state) { int old_tnc_state = sp->tnc_state; if (old_tnc_state != new_tnc_state) __tnc_set_sync_state(sp, new_tnc_state); } static void resync_tnc(struct timer_list *t) { struct sixpack *sp = from_timer(sp, t, resync_t); static char resync_cmd = 0xe8; /* clear any data that might have been received */ sp->rx_count = 0; sp->rx_count_cooked = 0; /* reset state machine */ sp->status = 1; sp->status1 = 1; sp->status2 = 0; /* resync the TNC */ sp->led_state = 0x60; sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->tty->ops->write(sp->tty, &resync_cmd, 1); /* Start resync timer again -- the TNC might be still absent */ mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT); } static inline int tnc_init(struct sixpack *sp) { unsigned char inbyte = 0xe8; tnc_set_sync_state(sp, TNC_UNSYNC_STARTUP); sp->tty->ops->write(sp->tty, &inbyte, 1); mod_timer(&sp->resync_t, jiffies + SIXP_RESYNC_TIMEOUT); return 0; } /* * Open the high-level part of the 6pack channel. * This function is called by the TTY module when the * 6pack line discipline is called for. Because we are * sure the tty line exists, we only have to link it to * a free 6pcack channel... */ static int sixpack_open(struct tty_struct *tty) { char *rbuff = NULL, *xbuff = NULL; struct net_device *dev; struct sixpack *sp; unsigned long len; int err = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EOPNOTSUPP; dev = alloc_netdev(sizeof(struct sixpack), "sp%d", NET_NAME_UNKNOWN, sp_setup); if (!dev) { err = -ENOMEM; goto out; } sp = netdev_priv(dev); sp->dev = dev; spin_lock_init(&sp->lock); spin_lock_init(&sp->rxlock); refcount_set(&sp->refcnt, 1); init_completion(&sp->dead); /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ len = dev->mtu * 2; rbuff = kmalloc(len + 4, GFP_KERNEL); xbuff = kmalloc(len + 4, GFP_KERNEL); if (rbuff == NULL || xbuff == NULL) { err = -ENOBUFS; goto out_free; } spin_lock_bh(&sp->lock); sp->tty = tty; sp->rbuff = rbuff; sp->xbuff = xbuff; sp->mtu = AX25_MTU + 73; sp->buffsize = len; sp->rcount = 0; sp->rx_count = 0; sp->rx_count_cooked = 0; sp->xleft = 0; sp->flags = 0; /* Clear ESCAPE & ERROR flags */ sp->duplex = 0; sp->tx_delay = SIXP_TXDELAY; sp->persistence = SIXP_PERSIST; sp->slottime = SIXP_SLOTTIME; sp->led_state = 0x60; sp->status = 1; sp->status1 = 1; sp->status2 = 0; sp->tx_enable = 0; netif_start_queue(dev); timer_setup(&sp->tx_t, sp_xmit_on_air, 0); timer_setup(&sp->resync_t, resync_tnc, 0); spin_unlock_bh(&sp->lock); /* Done. We have linked the TTY line to a channel. */ tty->disc_data = sp; tty->receive_room = 65536; /* Now we're ready to register. */ err = register_netdev(dev); if (err) goto out_free; tnc_init(sp); return 0; out_free: kfree(xbuff); kfree(rbuff); free_netdev(dev); out: return err; } /* * Close down a 6pack channel. * This means flushing out any pending queues, and then restoring the * TTY line discipline to what it was before it got hooked to 6pack * (which usually is TTY again). */ static void sixpack_close(struct tty_struct *tty) { struct sixpack *sp; write_lock_irq(&disc_data_lock); sp = tty->disc_data; tty->disc_data = NULL; write_unlock_irq(&disc_data_lock); if (!sp) return; /* * We have now ensured that nobody can start using ap from now on, but * we have to wait for all existing users to finish. */ if (!refcount_dec_and_test(&sp->refcnt)) wait_for_completion(&sp->dead); /* We must stop the queue to avoid potentially scribbling * on the free buffers. The sp->dead completion is not sufficient * to protect us from sp->xbuff access. */ netif_stop_queue(sp->dev); unregister_netdev(sp->dev); del_timer_sync(&sp->tx_t); del_timer_sync(&sp->resync_t); /* Free all 6pack frame buffers after unreg. */ kfree(sp->rbuff); kfree(sp->xbuff); free_netdev(sp->dev); } /* Perform I/O control on an active 6pack channel. */ static int sixpack_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct sixpack *sp = sp_get(tty); struct net_device *dev; unsigned int tmp, err; if (!sp) return -ENXIO; dev = sp->dev; switch(cmd) { case SIOCGIFNAME: err = copy_to_user((void __user *) arg, dev->name, strlen(dev->name) + 1) ? -EFAULT : 0; break; case SIOCGIFENCAP: err = put_user(0, (int __user *) arg); break; case SIOCSIFENCAP: if (get_user(tmp, (int __user *) arg)) { err = -EFAULT; break; } sp->mode = tmp; dev->addr_len = AX25_ADDR_LEN; dev->hard_header_len = AX25_KISS_HEADER_LEN + AX25_MAX_HEADER_LEN + 3; dev->type = ARPHRD_AX25; err = 0; break; case SIOCSIFHWADDR: { char addr[AX25_ADDR_LEN]; if (copy_from_user(&addr, (void __user *)arg, AX25_ADDR_LEN)) { err = -EFAULT; break; } netif_tx_lock_bh(dev); __dev_addr_set(dev, &addr, AX25_ADDR_LEN); netif_tx_unlock_bh(dev); err = 0; break; } default: err = tty_mode_ioctl(tty, cmd, arg); } sp_put(sp); return err; } static struct tty_ldisc_ops sp_ldisc = { .owner = THIS_MODULE, .num = N_6PACK, .name = "6pack", .open = sixpack_open, .close = sixpack_close, .ioctl = sixpack_ioctl, .receive_buf = sixpack_receive_buf, .write_wakeup = sixpack_write_wakeup, }; /* Initialize 6pack control device -- register 6pack line discipline */ static const char msg_banner[] __initconst = KERN_INFO \ "AX.25: 6pack driver, " SIXPACK_VERSION "\n"; static const char msg_regfail[] __initconst = KERN_ERR \ "6pack: can't register line discipline (err = %d)\n"; static int __init sixpack_init_driver(void) { int status; printk(msg_banner); /* Register the provided line protocol discipline */ status = tty_register_ldisc(&sp_ldisc); if (status) printk(msg_regfail, status); return status; } static void __exit sixpack_exit_driver(void) { tty_unregister_ldisc(&sp_ldisc); } /* encode an AX.25 packet into 6pack */ static int encode_sixpack(unsigned char *tx_buf, unsigned char *tx_buf_raw, int length, unsigned char tx_delay) { int count = 0; unsigned char checksum = 0, buf[400]; int raw_count = 0; tx_buf_raw[raw_count++] = SIXP_PRIO_CMD_MASK | SIXP_TX_MASK; tx_buf_raw[raw_count++] = SIXP_SEOF; buf[0] = tx_delay; for (count = 1; count < length; count++) buf[count] = tx_buf[count]; for (count = 0; count < length; count++) checksum += buf[count]; buf[length] = (unsigned char) 0xff - checksum; for (count = 0; count <= length; count++) { if ((count % 3) == 0) { tx_buf_raw[raw_count++] = (buf[count] & 0x3f); tx_buf_raw[raw_count] = ((buf[count] >> 2) & 0x30); } else if ((count % 3) == 1) { tx_buf_raw[raw_count++] |= (buf[count] & 0x0f); tx_buf_raw[raw_count] = ((buf[count] >> 2) & 0x3c); } else { tx_buf_raw[raw_count++] |= (buf[count] & 0x03); tx_buf_raw[raw_count++] = (buf[count] >> 2); } } if ((length % 3) != 2) raw_count++; tx_buf_raw[raw_count++] = SIXP_SEOF; return raw_count; } /* decode 4 sixpack-encoded bytes into 3 data bytes */ static void decode_data(struct sixpack *sp, unsigned char inbyte) { unsigned char *buf; if (sp->rx_count != 3) { sp->raw_buf[sp->rx_count++] = inbyte; return; } if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) { pr_err("6pack: cooked buffer overrun, data loss\n"); sp->rx_count = 0; return; } buf = sp->raw_buf; sp->cooked_buf[sp->rx_count_cooked++] = buf[0] | ((buf[1] << 2) & 0xc0); sp->cooked_buf[sp->rx_count_cooked++] = (buf[1] & 0x0f) | ((buf[2] << 2) & 0xf0); sp->cooked_buf[sp->rx_count_cooked++] = (buf[2] & 0x03) | (inbyte << 2); sp->rx_count = 0; } /* identify and execute a 6pack priority command byte */ static void decode_prio_command(struct sixpack *sp, unsigned char cmd) { int actual; if ((cmd & SIXP_PRIO_DATA_MASK) != 0) { /* idle ? */ /* RX and DCD flags can only be set in the same prio command, if the DCD flag has been set without the RX flag in the previous prio command. If DCD has not been set before, something in the transmission has gone wrong. In this case, RX and DCD are cleared in order to prevent the decode_data routine from reading further data that might be corrupt. */ if (((sp->status & SIXP_DCD_MASK) == 0) && ((cmd & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK)) { if (sp->status != 1) printk(KERN_DEBUG "6pack: protocol violation\n"); else sp->status = 0; cmd &= ~SIXP_RX_DCD_MASK; } sp->status = cmd & SIXP_PRIO_DATA_MASK; } else { /* output watchdog char if idle */ if ((sp->status2 != 0) && (sp->duplex == 1)) { sp->led_state = 0x70; sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->tx_enable = 1; actual = sp->tty->ops->write(sp->tty, sp->xbuff, sp->status2); sp->xleft -= actual; sp->xhead += actual; sp->led_state = 0x60; sp->status2 = 0; } } /* needed to trigger the TNC watchdog */ sp->tty->ops->write(sp->tty, &sp->led_state, 1); /* if the state byte has been received, the TNC is present, so the resync timer can be reset. */ if (sp->tnc_state == TNC_IN_SYNC) mod_timer(&sp->resync_t, jiffies + SIXP_INIT_RESYNC_TIMEOUT); sp->status1 = cmd & SIXP_PRIO_DATA_MASK; } /* identify and execute a standard 6pack command byte */ static void decode_std_command(struct sixpack *sp, unsigned char cmd) { unsigned char checksum = 0, rest = 0; short i; switch (cmd & SIXP_CMD_MASK) { /* normal command */ case SIXP_SEOF: if ((sp->rx_count == 0) && (sp->rx_count_cooked == 0)) { if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK) { sp->led_state = 0x68; sp->tty->ops->write(sp->tty, &sp->led_state, 1); } } else { sp->led_state = 0x60; /* fill trailing bytes with zeroes */ sp->tty->ops->write(sp->tty, &sp->led_state, 1); spin_lock_bh(&sp->rxlock); rest = sp->rx_count; if (rest != 0) for (i = rest; i <= 3; i++) decode_data(sp, 0); if (rest == 2) sp->rx_count_cooked -= 2; else if (rest == 3) sp->rx_count_cooked -= 1; for (i = 0; i < sp->rx_count_cooked; i++) checksum += sp->cooked_buf[i]; if (checksum != SIXP_CHKSUM) { printk(KERN_DEBUG "6pack: bad checksum %2.2x\n", checksum); } else { sp->rcount = sp->rx_count_cooked-2; sp_bump(sp, 0); } sp->rx_count_cooked = 0; spin_unlock_bh(&sp->rxlock); } break; case SIXP_TX_URUN: printk(KERN_DEBUG "6pack: TX underrun\n"); break; case SIXP_RX_ORUN: printk(KERN_DEBUG "6pack: RX overrun\n"); break; case SIXP_RX_BUF_OVL: printk(KERN_DEBUG "6pack: RX buffer overflow\n"); } } /* decode a 6pack packet */ static void sixpack_decode(struct sixpack *sp, const unsigned char *pre_rbuff, int count) { unsigned char inbyte; int count1; for (count1 = 0; count1 < count; count1++) { inbyte = pre_rbuff[count1]; if (inbyte == SIXP_FOUND_TNC) { tnc_set_sync_state(sp, TNC_IN_SYNC); del_timer(&sp->resync_t); } if ((inbyte & SIXP_PRIO_CMD_MASK) != 0) decode_prio_command(sp, inbyte); else if ((inbyte & SIXP_STD_CMD_MASK) != 0) decode_std_command(sp, inbyte); else if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK) { spin_lock_bh(&sp->rxlock); decode_data(sp, inbyte); spin_unlock_bh(&sp->rxlock); } } } MODULE_AUTHOR("Ralf Baechle DO1GRB <ralf@linux-mips.org>"); MODULE_DESCRIPTION("6pack driver for AX.25"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_6PACK); module_init(sixpack_init_driver); module_exit(sixpack_exit_driver); |
7 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. */ #ifndef BTRFS_TRANSACTION_H #define BTRFS_TRANSACTION_H #include <linux/refcount.h> #include "btrfs_inode.h" #include "delayed-ref.h" #include "ctree.h" #include "misc.h" enum btrfs_trans_state { TRANS_STATE_RUNNING, TRANS_STATE_COMMIT_PREP, TRANS_STATE_COMMIT_START, TRANS_STATE_COMMIT_DOING, TRANS_STATE_UNBLOCKED, TRANS_STATE_SUPER_COMMITTED, TRANS_STATE_COMPLETED, TRANS_STATE_MAX, }; #define BTRFS_TRANS_HAVE_FREE_BGS 0 #define BTRFS_TRANS_DIRTY_BG_RUN 1 #define BTRFS_TRANS_CACHE_ENOSPC 2 struct btrfs_transaction { u64 transid; /* * total external writers(USERSPACE/START/ATTACH) in this * transaction, it must be zero before the transaction is * being committed */ atomic_t num_extwriters; /* * total writers in this transaction, it must be zero before the * transaction can end */ atomic_t num_writers; refcount_t use_count; unsigned long flags; /* Be protected by fs_info->trans_lock when we want to change it. */ enum btrfs_trans_state state; int aborted; struct list_head list; struct extent_io_tree dirty_pages; time64_t start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; struct list_head dev_update_list; struct list_head switch_commits; struct list_head dirty_bgs; /* * There is no explicit lock which protects io_bgs, rather its * consistency is implied by the fact that all the sites which modify * it do so under some form of transaction critical section, namely: * * - btrfs_start_dirty_block_groups - This function can only ever be * run by one of the transaction committers. Refer to * BTRFS_TRANS_DIRTY_BG_RUN usage in btrfs_commit_transaction * * - btrfs_write_dirty_blockgroups - this is called by * commit_cowonly_roots from transaction critical section * (TRANS_STATE_COMMIT_DOING) * * - btrfs_cleanup_dirty_bgs - called on transaction abort */ struct list_head io_bgs; struct list_head dropped_roots; struct extent_io_tree pinned_extents; /* * we need to make sure block group deletion doesn't race with * free space cache writeout. This mutex keeps them from stomping * on each other */ struct mutex cache_write_mutex; spinlock_t dirty_bgs_lock; /* Protected by spin lock fs_info->unused_bgs_lock. */ struct list_head deleted_bgs; spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; struct btrfs_fs_info *fs_info; /* * Number of ordered extents the transaction must wait for before * committing. These are ordered extents started by a fast fsync. */ atomic_t pending_ordered; wait_queue_head_t pending_wait; }; enum { ENUM_BIT(__TRANS_FREEZABLE), ENUM_BIT(__TRANS_START), ENUM_BIT(__TRANS_ATTACH), ENUM_BIT(__TRANS_JOIN), ENUM_BIT(__TRANS_JOIN_NOLOCK), ENUM_BIT(__TRANS_DUMMY), ENUM_BIT(__TRANS_JOIN_NOSTART), }; #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) #define TRANS_ATTACH (__TRANS_ATTACH) #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) #define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART) #define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH) struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; u64 delayed_refs_bytes_reserved; u64 chunk_bytes_reserved; unsigned long delayed_ref_updates; unsigned long delayed_ref_csum_deletions; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; /* Set by a task that wants to create a snapshot. */ struct btrfs_pending_snapshot *pending_snapshot; refcount_t use_count; unsigned int type; /* * Error code of transaction abort, set outside of locks and must use * the READ_ONCE/WRITE_ONCE access */ short aborted; bool adding_csums; bool allocating_chunk; bool removing_chunk; bool reloc_reserved; bool in_fsync; struct btrfs_fs_info *fs_info; struct list_head new_bgs; struct btrfs_block_rsv delayed_rsv; }; /* * The abort status can be changed between calls and is not protected by locks. * This accepts btrfs_transaction and btrfs_trans_handle as types. Once it's * set to a non-zero value it does not change, so the macro should be in checks * but is not necessary for further reads of the value. */ #define TRANS_ABORTED(trans) (unlikely(READ_ONCE((trans)->aborted))) struct btrfs_pending_snapshot { struct dentry *dentry; struct inode *dir; struct btrfs_root *root; struct btrfs_root_item *root_item; struct btrfs_root *snap; struct btrfs_qgroup_inherit *inherit; struct btrfs_path *path; /* block reservation for the operation */ struct btrfs_block_rsv block_rsv; /* extra metadata reservation for relocation */ int error; /* Preallocated anonymous block device number */ dev_t anon_dev; bool readonly; struct list_head list; }; static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { spin_lock(&inode->lock); inode->last_trans = trans->transaction->transid; inode->last_sub_trans = btrfs_get_root_log_transid(inode->root); inode->last_log_commit = inode->last_sub_trans - 1; spin_unlock(&inode->lock); } /* * Make qgroup codes to skip given qgroupid, means the old/new_roots for * qgroup won't contain the qgroupid in it. */ static inline void btrfs_set_skip_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_delayed_ref_root *delayed_refs; delayed_refs = &trans->transaction->delayed_refs; WARN_ON(delayed_refs->qgroup_to_skip); delayed_refs->qgroup_to_skip = qgroupid; } static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans) { struct btrfs_delayed_ref_root *delayed_refs; delayed_refs = &trans->transaction->delayed_refs; WARN_ON(!delayed_refs->qgroup_to_skip); delayed_refs->qgroup_to_skip = 0; } bool __cold abort_should_print_stack(int error); /* * Call btrfs_abort_transaction as early as possible when an error condition is * detected, that way the exact stack trace is reported for some errors. */ #define btrfs_abort_transaction(trans, error) \ do { \ bool first = false; \ /* Report first abort since mount */ \ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ &((trans)->fs_info->fs_state))) { \ first = true; \ if (WARN(abort_should_print_stack(error), \ KERN_ERR \ "BTRFS: Transaction aborted (error %d)\n", \ (error))) { \ /* Stack trace printed. */ \ } else { \ btrfs_err((trans)->fs_info, \ "Transaction aborted (error %d)", \ (error)); \ } \ } \ __btrfs_abort_transaction((trans), __func__, \ __LINE__, (error), first); \ } while (0) int btrfs_end_transaction(struct btrfs_trans_handle *trans); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, unsigned int num_items); struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( struct btrfs_root *root, unsigned int num_items); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction_barrier( struct btrfs_root *root); int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid); void btrfs_add_dead_root(struct btrfs_root *root); void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info); int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info); int btrfs_commit_transaction(struct btrfs_trans_handle *trans); void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans); void btrfs_throttle(struct btrfs_fs_info *fs_info); int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info, struct extent_io_tree *dirty_pages, int mark); int btrfs_wait_tree_log_extents(struct btrfs_root *root, int mark); int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); void btrfs_put_transaction(struct btrfs_transaction *transaction); void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans); void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans, const char *function, unsigned int line, int error, bool first_hit); int __init btrfs_transaction_init(void); void __cold btrfs_transaction_exit(void); #endif |
53 8 264 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | // SPDX-License-Identifier: GPL-2.0-only /* * Unified UUID/GUID definition * * Copyright (C) 2009, 2016 Intel Corp. * Huang Ying <ying.huang@intel.com> */ #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/uuid.h> #include <linux/random.h> const guid_t guid_null; EXPORT_SYMBOL(guid_null); const uuid_t uuid_null; EXPORT_SYMBOL(uuid_null); const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; /** * generate_random_uuid - generate a random UUID * @uuid: where to put the generated UUID * * Random UUID interface * * Used to create a Boot ID or a filesystem UUID/GUID, but can be * useful for other kernel drivers. */ void generate_random_uuid(unsigned char uuid[16]) { get_random_bytes(uuid, 16); /* Set UUID version to 4 --- truly random generation */ uuid[6] = (uuid[6] & 0x0F) | 0x40; /* Set the UUID variant to DCE */ uuid[8] = (uuid[8] & 0x3F) | 0x80; } EXPORT_SYMBOL(generate_random_uuid); void generate_random_guid(unsigned char guid[16]) { get_random_bytes(guid, 16); /* Set GUID version to 4 --- truly random generation */ guid[7] = (guid[7] & 0x0F) | 0x40; /* Set the GUID variant to DCE */ guid[8] = (guid[8] & 0x3F) | 0x80; } EXPORT_SYMBOL(generate_random_guid); static void __uuid_gen_common(__u8 b[16]) { get_random_bytes(b, 16); /* reversion 0b10 */ b[8] = (b[8] & 0x3F) | 0x80; } void guid_gen(guid_t *lu) { __uuid_gen_common(lu->b); /* version 4 : random generation */ lu->b[7] = (lu->b[7] & 0x0F) | 0x40; } EXPORT_SYMBOL_GPL(guid_gen); void uuid_gen(uuid_t *bu) { __uuid_gen_common(bu->b); /* version 4 : random generation */ bu->b[6] = (bu->b[6] & 0x0F) | 0x40; } EXPORT_SYMBOL_GPL(uuid_gen); /** * uuid_is_valid - checks if a UUID string is valid * @uuid: UUID string to check * * Description: * It checks if the UUID string is following the format: * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx * * where x is a hex digit. * * Return: true if input is valid UUID string. */ bool uuid_is_valid(const char *uuid) { unsigned int i; for (i = 0; i < UUID_STRING_LEN; i++) { if (i == 8 || i == 13 || i == 18 || i == 23) { if (uuid[i] != '-') return false; } else if (!isxdigit(uuid[i])) { return false; } } return true; } EXPORT_SYMBOL(uuid_is_valid); static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16]) { static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34}; unsigned int i; if (!uuid_is_valid(uuid)) return -EINVAL; for (i = 0; i < 16; i++) { int hi = hex_to_bin(uuid[si[i] + 0]); int lo = hex_to_bin(uuid[si[i] + 1]); b[ei[i]] = (hi << 4) | lo; } return 0; } int guid_parse(const char *uuid, guid_t *u) { return __uuid_parse(uuid, u->b, guid_index); } EXPORT_SYMBOL(guid_parse); int uuid_parse(const char *uuid, uuid_t *u) { return __uuid_parse(uuid, u->b, uuid_index); } EXPORT_SYMBOL(uuid_parse); |
39 39 39 36 3 2 37 38 39 39 39 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | /* * Generic fillrect for frame buffers in system RAM with packed pixels of * any depth. * * Based almost entirely from cfbfillrect.c (which is based almost entirely * on Geert Uytterhoeven's fillrect routine) * * Copyright (C) 2007 Antonino Daplas <adaplas@pol.net> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. */ #include <linux/module.h> #include <linux/string.h> #include <linux/fb.h> #include <asm/types.h> #include "fb_draw.h" /* * Aligned pattern fill using 32/64-bit memory accesses */ static void bitfill_aligned(struct fb_info *p, unsigned long *dst, int dst_idx, unsigned long pat, unsigned n, int bits) { unsigned long first, last; if (!n) return; first = FB_SHIFT_HIGH(p, ~0UL, dst_idx); last = ~(FB_SHIFT_HIGH(p, ~0UL, (dst_idx+n) % bits)); if (dst_idx+n <= bits) { /* Single word */ if (last) first &= last; *dst = comp(pat, *dst, first); } else { /* Multiple destination words */ /* Leading bits */ if (first!= ~0UL) { *dst = comp(pat, *dst, first); dst++; n -= bits - dst_idx; } /* Main chunk */ n /= bits; memset_l(dst, pat, n); dst += n; /* Trailing bits */ if (last) *dst = comp(pat, *dst, last); } } /* * Unaligned generic pattern fill using 32/64-bit memory accesses * The pattern must have been expanded to a full 32/64-bit value * Left/right are the appropriate shifts to convert to the pattern to be * used for the next 32/64-bit word */ static void bitfill_unaligned(struct fb_info *p, unsigned long *dst, int dst_idx, unsigned long pat, int left, int right, unsigned n, int bits) { unsigned long first, last; if (!n) return; first = FB_SHIFT_HIGH(p, ~0UL, dst_idx); last = ~(FB_SHIFT_HIGH(p, ~0UL, (dst_idx+n) % bits)); if (dst_idx+n <= bits) { /* Single word */ if (last) first &= last; *dst = comp(pat, *dst, first); } else { /* Multiple destination words */ /* Leading bits */ if (first) { *dst = comp(pat, *dst, first); dst++; pat = pat << left | pat >> right; n -= bits - dst_idx; } /* Main chunk */ n /= bits; while (n >= 4) { *dst++ = pat; pat = pat << left | pat >> right; *dst++ = pat; pat = pat << left | pat >> right; *dst++ = pat; pat = pat << left | pat >> right; *dst++ = pat; pat = pat << left | pat >> right; n -= 4; } while (n--) { *dst++ = pat; pat = pat << left | pat >> right; } /* Trailing bits */ if (last) *dst = comp(pat, *dst, last); } } /* * Aligned pattern invert using 32/64-bit memory accesses */ static void bitfill_aligned_rev(struct fb_info *p, unsigned long *dst, int dst_idx, unsigned long pat, unsigned n, int bits) { unsigned long val = pat; unsigned long first, last; if (!n) return; first = FB_SHIFT_HIGH(p, ~0UL, dst_idx); last = ~(FB_SHIFT_HIGH(p, ~0UL, (dst_idx+n) % bits)); if (dst_idx+n <= bits) { /* Single word */ if (last) first &= last; *dst = comp(*dst ^ val, *dst, first); } else { /* Multiple destination words */ /* Leading bits */ if (first!=0UL) { *dst = comp(*dst ^ val, *dst, first); dst++; n -= bits - dst_idx; } /* Main chunk */ n /= bits; while (n >= 8) { *dst++ ^= val; *dst++ ^= val; *dst++ ^= val; *dst++ ^= val; *dst++ ^= val; *dst++ ^= val; *dst++ ^= val; *dst++ ^= val; n -= 8; } while (n--) *dst++ ^= val; /* Trailing bits */ if (last) *dst = comp(*dst ^ val, *dst, last); } } /* * Unaligned generic pattern invert using 32/64-bit memory accesses * The pattern must have been expanded to a full 32/64-bit value * Left/right are the appropriate shifts to convert to the pattern to be * used for the next 32/64-bit word */ static void bitfill_unaligned_rev(struct fb_info *p, unsigned long *dst, int dst_idx, unsigned long pat, int left, int right, unsigned n, int bits) { unsigned long first, last; if (!n) return; first = FB_SHIFT_HIGH(p, ~0UL, dst_idx); last = ~(FB_SHIFT_HIGH(p, ~0UL, (dst_idx+n) % bits)); if (dst_idx+n <= bits) { /* Single word */ if (last) first &= last; *dst = comp(*dst ^ pat, *dst, first); } else { /* Multiple destination words */ /* Leading bits */ if (first != 0UL) { *dst = comp(*dst ^ pat, *dst, first); dst++; pat = pat << left | pat >> right; n -= bits - dst_idx; } /* Main chunk */ n /= bits; while (n >= 4) { *dst++ ^= pat; pat = pat << left | pat >> right; *dst++ ^= pat; pat = pat << left | pat >> right; *dst++ ^= pat; pat = pat << left | pat >> right; *dst++ ^= pat; pat = pat << left | pat >> right; n -= 4; } while (n--) { *dst ^= pat; pat = pat << left | pat >> right; } /* Trailing bits */ if (last) *dst = comp(*dst ^ pat, *dst, last); } } void sys_fillrect(struct fb_info *p, const struct fb_fillrect *rect) { unsigned long pat, pat2, fg; unsigned long width = rect->width, height = rect->height; int bits = BITS_PER_LONG, bytes = bits >> 3; u32 bpp = p->var.bits_per_pixel; unsigned long *dst; int dst_idx, left; if (p->state != FBINFO_STATE_RUNNING) return; if (p->fix.visual == FB_VISUAL_TRUECOLOR || p->fix.visual == FB_VISUAL_DIRECTCOLOR ) fg = ((u32 *) (p->pseudo_palette))[rect->color]; else fg = rect->color; pat = pixel_to_pat( bpp, fg); dst = (unsigned long *)((unsigned long)p->screen_base & ~(bytes-1)); dst_idx = ((unsigned long)p->screen_base & (bytes - 1))*8; dst_idx += rect->dy*p->fix.line_length*8+rect->dx*bpp; /* FIXME For now we support 1-32 bpp only */ left = bits % bpp; if (p->fbops->fb_sync) p->fbops->fb_sync(p); if (!left) { void (*fill_op32)(struct fb_info *p, unsigned long *dst, int dst_idx, unsigned long pat, unsigned n, int bits) = NULL; switch (rect->rop) { case ROP_XOR: fill_op32 = bitfill_aligned_rev; break; case ROP_COPY: fill_op32 = bitfill_aligned; break; default: printk( KERN_ERR "cfb_fillrect(): unknown rop, " "defaulting to ROP_COPY\n"); fill_op32 = bitfill_aligned; break; } while (height--) { dst += dst_idx >> (ffs(bits) - 1); dst_idx &= (bits - 1); fill_op32(p, dst, dst_idx, pat, width*bpp, bits); dst_idx += p->fix.line_length*8; } } else { int right, r; void (*fill_op)(struct fb_info *p, unsigned long *dst, int dst_idx, unsigned long pat, int left, int right, unsigned n, int bits) = NULL; #ifdef __LITTLE_ENDIAN right = left; left = bpp - right; #else right = bpp - left; #endif switch (rect->rop) { case ROP_XOR: fill_op = bitfill_unaligned_rev; break; case ROP_COPY: fill_op = bitfill_unaligned; break; default: printk(KERN_ERR "sys_fillrect(): unknown rop, " "defaulting to ROP_COPY\n"); fill_op = bitfill_unaligned; break; } while (height--) { dst += dst_idx / bits; dst_idx &= (bits - 1); r = dst_idx % bpp; /* rotate pattern to the correct start position */ pat2 = le_long_to_cpu(rolx(cpu_to_le_long(pat), r, bpp)); fill_op(p, dst, dst_idx, pat2, left, right, width*bpp, bits); dst_idx += p->fix.line_length*8; } } } EXPORT_SYMBOL(sys_fillrect); MODULE_AUTHOR("Antonino Daplas <adaplas@pol.net>"); MODULE_DESCRIPTION("Generic fill rectangle (sys-to-sys)"); MODULE_LICENSE("GPL"); |
6 8 8 3 5 2 8 8 8 8 8 8 8 8 6 3 8 1 4 8 4 1 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 | // SPDX-License-Identifier: GPL-2.0-or-later /* procfs files for key database enumeration * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/init.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <asm/errno.h> #include "internal.h" static void *proc_keys_start(struct seq_file *p, loff_t *_pos); static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos); static void proc_keys_stop(struct seq_file *p, void *v); static int proc_keys_show(struct seq_file *m, void *v); static const struct seq_operations proc_keys_ops = { .start = proc_keys_start, .next = proc_keys_next, .stop = proc_keys_stop, .show = proc_keys_show, }; static void *proc_key_users_start(struct seq_file *p, loff_t *_pos); static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos); static void proc_key_users_stop(struct seq_file *p, void *v); static int proc_key_users_show(struct seq_file *m, void *v); static const struct seq_operations proc_key_users_ops = { .start = proc_key_users_start, .next = proc_key_users_next, .stop = proc_key_users_stop, .show = proc_key_users_show, }; /* * Declare the /proc files. */ static int __init key_proc_init(void) { struct proc_dir_entry *p; p = proc_create_seq("keys", 0, NULL, &proc_keys_ops); if (!p) panic("Cannot create /proc/keys\n"); p = proc_create_seq("key-users", 0, NULL, &proc_key_users_ops); if (!p) panic("Cannot create /proc/key-users\n"); return 0; } __initcall(key_proc_init); /* * Implement "/proc/keys" to provide a list of the keys on the system that * grant View permission to the caller. */ static struct rb_node *key_serial_next(struct seq_file *p, struct rb_node *n) { struct user_namespace *user_ns = seq_user_ns(p); n = rb_next(n); while (n) { struct key *key = rb_entry(n, struct key, serial_node); if (kuid_has_mapping(user_ns, key->user->uid)) break; n = rb_next(n); } return n; } static struct key *find_ge_key(struct seq_file *p, key_serial_t id) { struct user_namespace *user_ns = seq_user_ns(p); struct rb_node *n = key_serial_tree.rb_node; struct key *minkey = NULL; while (n) { struct key *key = rb_entry(n, struct key, serial_node); if (id < key->serial) { if (!minkey || minkey->serial > key->serial) minkey = key; n = n->rb_left; } else if (id > key->serial) { n = n->rb_right; } else { minkey = key; break; } key = NULL; } if (!minkey) return NULL; for (;;) { if (kuid_has_mapping(user_ns, minkey->user->uid)) return minkey; n = rb_next(&minkey->serial_node); if (!n) return NULL; minkey = rb_entry(n, struct key, serial_node); } } static void *proc_keys_start(struct seq_file *p, loff_t *_pos) __acquires(key_serial_lock) { key_serial_t pos = *_pos; struct key *key; spin_lock(&key_serial_lock); if (*_pos > INT_MAX) return NULL; key = find_ge_key(p, pos); if (!key) return NULL; *_pos = key->serial; return &key->serial_node; } static inline key_serial_t key_node_serial(struct rb_node *n) { struct key *key = rb_entry(n, struct key, serial_node); return key->serial; } static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos) { struct rb_node *n; n = key_serial_next(p, v); if (n) *_pos = key_node_serial(n); else (*_pos)++; return n; } static void proc_keys_stop(struct seq_file *p, void *v) __releases(key_serial_lock) { spin_unlock(&key_serial_lock); } static int proc_keys_show(struct seq_file *m, void *v) { struct rb_node *_p = v; struct key *key = rb_entry(_p, struct key, serial_node); unsigned long flags; key_ref_t key_ref, skey_ref; time64_t now, expiry; char xbuf[16]; short state; u64 timo; int rc; struct keyring_search_context ctx = { .index_key = key->index_key, .cred = m->file->f_cred, .match_data.cmp = lookup_user_key_possessed, .match_data.raw_data = key, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_RECURSE), }; key_ref = make_key_ref(key, 0); /* determine if the key is possessed by this process (a test we can * skip if the key does not indicate the possessor can view it */ if (key->perm & KEY_POS_VIEW) { rcu_read_lock(); skey_ref = search_cred_keyrings_rcu(&ctx); rcu_read_unlock(); if (!IS_ERR(skey_ref)) { key_ref_put(skey_ref); key_ref = make_key_ref(key, 1); } } /* check whether the current task is allowed to view the key */ rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW); if (rc < 0) return 0; now = ktime_get_real_seconds(); rcu_read_lock(); /* come up with a suitable timeout value */ expiry = READ_ONCE(key->expiry); if (expiry == 0) { memcpy(xbuf, "perm", 5); } else if (now >= expiry) { memcpy(xbuf, "expd", 5); } else { timo = expiry - now; if (timo < 60) sprintf(xbuf, "%llus", timo); else if (timo < 60*60) sprintf(xbuf, "%llum", div_u64(timo, 60)); else if (timo < 60*60*24) sprintf(xbuf, "%lluh", div_u64(timo, 60 * 60)); else if (timo < 60*60*24*7) sprintf(xbuf, "%llud", div_u64(timo, 60 * 60 * 24)); else sprintf(xbuf, "%lluw", div_u64(timo, 60 * 60 * 24 * 7)); } state = key_read_state(key); #define showflag(FLAGS, LETTER, FLAG) \ ((FLAGS & (1 << FLAG)) ? LETTER : '-') flags = READ_ONCE(key->flags); seq_printf(m, "%08x %c%c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ", key->serial, state != KEY_IS_UNINSTANTIATED ? 'I' : '-', showflag(flags, 'R', KEY_FLAG_REVOKED), showflag(flags, 'D', KEY_FLAG_DEAD), showflag(flags, 'Q', KEY_FLAG_IN_QUOTA), showflag(flags, 'U', KEY_FLAG_USER_CONSTRUCT), state < 0 ? 'N' : '-', showflag(flags, 'i', KEY_FLAG_INVALIDATED), refcount_read(&key->usage), xbuf, key->perm, from_kuid_munged(seq_user_ns(m), key->uid), from_kgid_munged(seq_user_ns(m), key->gid), key->type->name); #undef showflag if (key->type->describe) key->type->describe(key, m); seq_putc(m, '\n'); rcu_read_unlock(); return 0; } static struct rb_node *__key_user_next(struct user_namespace *user_ns, struct rb_node *n) { while (n) { struct key_user *user = rb_entry(n, struct key_user, node); if (kuid_has_mapping(user_ns, user->uid)) break; n = rb_next(n); } return n; } static struct rb_node *key_user_next(struct user_namespace *user_ns, struct rb_node *n) { return __key_user_next(user_ns, rb_next(n)); } static struct rb_node *key_user_first(struct user_namespace *user_ns, struct rb_root *r) { struct rb_node *n = rb_first(r); return __key_user_next(user_ns, n); } static void *proc_key_users_start(struct seq_file *p, loff_t *_pos) __acquires(key_user_lock) { struct rb_node *_p; loff_t pos = *_pos; spin_lock(&key_user_lock); _p = key_user_first(seq_user_ns(p), &key_user_tree); while (pos > 0 && _p) { pos--; _p = key_user_next(seq_user_ns(p), _p); } return _p; } static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos) { (*_pos)++; return key_user_next(seq_user_ns(p), (struct rb_node *)v); } static void proc_key_users_stop(struct seq_file *p, void *v) __releases(key_user_lock) { spin_unlock(&key_user_lock); } static int proc_key_users_show(struct seq_file *m, void *v) { struct rb_node *_p = v; struct key_user *user = rb_entry(_p, struct key_user, node); unsigned maxkeys = uid_eq(user->uid, GLOBAL_ROOT_UID) ? key_quota_root_maxkeys : key_quota_maxkeys; unsigned maxbytes = uid_eq(user->uid, GLOBAL_ROOT_UID) ? key_quota_root_maxbytes : key_quota_maxbytes; seq_printf(m, "%5u: %5d %d/%d %d/%d %d/%d\n", from_kuid_munged(seq_user_ns(m), user->uid), refcount_read(&user->usage), atomic_read(&user->nkeys), atomic_read(&user->nikeys), user->qnkeys, maxkeys, user->qnbytes, maxbytes); return 0; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #ifndef __QUOTA_DOT_H__ #define __QUOTA_DOT_H__ #include <linux/list_lru.h> struct gfs2_inode; struct gfs2_sbd; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID int gfs2_qa_get(struct gfs2_inode *ip); void gfs2_qa_put(struct gfs2_inode *ip); int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); void gfs2_quota_unhold(struct gfs2_inode *ip); int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); void gfs2_quota_unlock(struct gfs2_inode *ip); int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, struct gfs2_alloc_parms *ap); void gfs2_quota_change(struct gfs2_inode *ip, s64 change, kuid_t uid, kgid_t gid); int gfs2_quota_sync(struct super_block *sb, int type); int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid); int gfs2_quota_init(struct gfs2_sbd *sdp); void gfs2_quota_cleanup(struct gfs2_sbd *sdp); int gfs2_quotad(void *data); void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int ret; ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ if (capable(CAP_SYS_RESOURCE) || sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) return ret; if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) return 0; ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) gfs2_quota_unlock(ip); return ret; } extern const struct quotactl_ops gfs2_quotactl_ops; int __init gfs2_qd_shrinker_init(void); void gfs2_qd_shrinker_exit(void); extern struct list_lru gfs2_qd_lru; void __init gfs2_quota_hash_init(void); #endif /* __QUOTA_DOT_H__ */ |
829 14 301 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H #include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; extern int khugepaged_init(void); extern void khugepaged_destroy(void); extern int start_stop_khugepaged(void); extern void __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); extern void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); extern bool current_is_khugepaged(void); #ifdef CONFIG_SHMEM extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd); #else static inline int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd) { return 0; } #endif static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags)) __khugepaged_enter(mm); } static inline void khugepaged_exit(struct mm_struct *mm) { if (test_bit(MMF_VM_HUGEPAGE, &mm->flags)) __khugepaged_exit(mm); } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { } static inline void khugepaged_exit(struct mm_struct *mm) { } static inline void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags) { } static inline int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd) { return 0; } static inline void khugepaged_min_free_kbytes_update(void) { } static inline bool current_is_khugepaged(void) { return false; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_KHUGEPAGED_H */ |
4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include "lru_cache.h" #include "messages.h" /* * Initialize a cache object. * * @cache: The cache. * @max_size: Maximum size (number of entries) for the cache. * Use 0 for unlimited size, it's the user's responsability to * trim the cache in that case. */ void btrfs_lru_cache_init(struct btrfs_lru_cache *cache, unsigned int max_size) { INIT_LIST_HEAD(&cache->lru_list); mt_init(&cache->entries); cache->size = 0; cache->max_size = max_size; } static struct btrfs_lru_cache_entry *match_entry(struct list_head *head, u64 key, u64 gen) { struct btrfs_lru_cache_entry *entry; list_for_each_entry(entry, head, list) { if (entry->key == key && entry->gen == gen) return entry; } return NULL; } /* * Lookup for an entry in the cache. * * @cache: The cache. * @key: The key of the entry we are looking for. * @gen: Generation associated to the key. * * Returns the entry associated with the key or NULL if none found. */ struct btrfs_lru_cache_entry *btrfs_lru_cache_lookup(struct btrfs_lru_cache *cache, u64 key, u64 gen) { struct list_head *head; struct btrfs_lru_cache_entry *entry; head = mtree_load(&cache->entries, key); if (!head) return NULL; entry = match_entry(head, key, gen); if (entry) list_move_tail(&entry->lru_list, &cache->lru_list); return entry; } /* * Remove an entry from the cache. * * @cache: The cache to remove from. * @entry: The entry to remove from the cache. * * Note: this also frees the memory used by the entry. */ void btrfs_lru_cache_remove(struct btrfs_lru_cache *cache, struct btrfs_lru_cache_entry *entry) { struct list_head *prev = entry->list.prev; ASSERT(cache->size > 0); ASSERT(!mtree_empty(&cache->entries)); list_del(&entry->list); list_del(&entry->lru_list); if (list_empty(prev)) { struct list_head *head; /* * If previous element in the list entry->list is now empty, it * means it's a head entry not pointing to any cached entries, * so remove it from the maple tree and free it. */ head = mtree_erase(&cache->entries, entry->key); ASSERT(head == prev); kfree(head); } kfree(entry); cache->size--; } /* * Store an entry in the cache. * * @cache: The cache. * @entry: The entry to store. * * Returns 0 on success and < 0 on error. */ int btrfs_lru_cache_store(struct btrfs_lru_cache *cache, struct btrfs_lru_cache_entry *new_entry, gfp_t gfp) { const u64 key = new_entry->key; struct list_head *head; int ret; head = kmalloc(sizeof(*head), gfp); if (!head) return -ENOMEM; ret = mtree_insert(&cache->entries, key, head, gfp); if (ret == 0) { INIT_LIST_HEAD(head); list_add_tail(&new_entry->list, head); } else if (ret == -EEXIST) { kfree(head); head = mtree_load(&cache->entries, key); ASSERT(head != NULL); if (match_entry(head, key, new_entry->gen) != NULL) return -EEXIST; list_add_tail(&new_entry->list, head); } else if (ret < 0) { kfree(head); return ret; } if (cache->max_size > 0 && cache->size == cache->max_size) { struct btrfs_lru_cache_entry *lru_entry; lru_entry = list_first_entry(&cache->lru_list, struct btrfs_lru_cache_entry, lru_list); btrfs_lru_cache_remove(cache, lru_entry); } list_add_tail(&new_entry->lru_list, &cache->lru_list); cache->size++; return 0; } /* * Empty a cache. * * @cache: The cache to empty. * * Removes all entries from the cache. */ void btrfs_lru_cache_clear(struct btrfs_lru_cache *cache) { struct btrfs_lru_cache_entry *entry; struct btrfs_lru_cache_entry *tmp; list_for_each_entry_safe(entry, tmp, &cache->lru_list, lru_list) btrfs_lru_cache_remove(cache, entry); ASSERT(cache->size == 0); ASSERT(mtree_empty(&cache->entries)); } |
4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #ifndef __INODE_DOT_H__ #define __INODE_DOT_H__ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/mm.h> #include "util.h" bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask); ssize_t gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, size_t size); void gfs2_set_aops(struct inode *inode); static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) { return !ip->i_height; } static inline int gfs2_is_jdata(const struct gfs2_inode *ip) { return ip->i_diskflags & GFS2_DIF_JDATA; } static inline bool gfs2_is_ordered(const struct gfs2_sbd *sdp) { return sdp->sd_args.ar_data == GFS2_DATA_ORDERED; } static inline bool gfs2_is_writeback(const struct gfs2_sbd *sdp) { return sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK; } static inline int gfs2_is_dir(const struct gfs2_inode *ip) { return S_ISDIR(ip->i_inode.i_mode); } static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) { inode->i_blocks = blocks << (inode->i_blkbits - 9); } static inline u64 gfs2_get_inode_blocks(const struct inode *inode) { return inode->i_blocks >> (inode->i_blkbits - 9); } static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) { change <<= inode->i_blkbits - 9; gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change)); inode->i_blocks += change; } static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, u64 no_formal_ino) { return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino; } static inline void gfs2_inum_out(const struct gfs2_inode *ip, struct gfs2_dirent *dent) { dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); } static inline int gfs2_check_internal_file_size(struct inode *inode, u64 minsize, u64 maxsize) { u64 size = i_size_read(inode); if (size < minsize || size > maxsize) goto err; if (size & (BIT(inode->i_blkbits) - 1)) goto err; return 0; err: gfs2_consist_inode(GFS2_I(inode)); return -EIO; } struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, u64 no_addr, u64 no_formal_ino, unsigned int blktype); struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 no_formal_ino, unsigned int blktype); int gfs2_inode_refresh(struct gfs2_inode *ip); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); struct inode *gfs2_lookup_meta(struct inode *dip, const char *name); void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); int gfs2_open_common(struct inode *inode, struct file *file); loff_t gfs2_seek_data(struct file *file, loff_t offset); loff_t gfs2_seek_hole(struct file *file, loff_t offset); extern const struct file_operations gfs2_file_fops_nolock; extern const struct file_operations gfs2_dir_fops_nolock; int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa); int gfs2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); void gfs2_set_inode_flags(struct inode *inode); #ifdef CONFIG_GFS2_FS_LOCKING_DLM extern const struct file_operations gfs2_file_fops; extern const struct file_operations gfs2_dir_fops; static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) { return sdp->sd_args.ar_localflocks; } #else /* Single node only */ #define gfs2_file_fops gfs2_file_fops_nolock #define gfs2_dir_fops gfs2_dir_fops_nolock static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) { return 1; } #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ #endif /* __INODE_DOT_H__ */ |
64 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2023 Western Digital Corporation or its affiliates. */ #ifndef BTRFS_RAID_STRIPE_TREE_H #define BTRFS_RAID_STRIPE_TREE_H #define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP | \ BTRFS_BLOCK_GROUP_RAID1_MASK | \ BTRFS_BLOCK_GROUP_RAID0 | \ BTRFS_BLOCK_GROUP_RAID10) struct btrfs_io_context; struct btrfs_io_stripe; struct btrfs_ordered_extent; struct btrfs_trans_handle; int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length); int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, u64 logical, u64 *length, u64 map_type, u32 stripe_index, struct btrfs_io_stripe *stripe); int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, struct btrfs_ordered_extent *ordered_extent); static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info, u64 map_type) { u64 type = map_type & BTRFS_BLOCK_GROUP_TYPE_MASK; u64 profile = map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK; if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE)) return false; if (type != BTRFS_BLOCK_GROUP_DATA) return false; if (profile & BTRFS_RST_SUPP_BLOCK_GROUP_MASK) return true; return false; } static inline int btrfs_num_raid_stripes(u32 item_size) { return (item_size - offsetof(struct btrfs_stripe_extent, strides)) / sizeof(struct btrfs_raid_stride); } #endif |
458 458 3 8 1 4 13 2 283 195 4 3 2 5 5 56 14 40 39 4 1 3 36 1 10 7 43 52 38 1 1 1 1 2 2 1 1 1 397 397 398 2 280 131 236 149 97 387 22 389 1 1 1 8 6 13 2 1 1 1 2 1 1 7 1 8 667 249 147 276 228 42 265 253 7 7 1 34 18 289 102 169 332 350 333 21 2 343 4 33 598 14 1 7 3 535 51 333 356 319 433 19 374 21 20 386 14 7 308 352 294 356 356 345 11 103 319 317 102 3 5 5 1 3 2 1 35 267 358 145 12 235 149 141 149 10 44 52 260 7 78 269 142 583 11 7 5 2 1 1 2 6 6 1 6 182 78 5 10 32 24 159 67 61 112 179 92 25 10 21 21 13 1 7 2 8 1 6 1 3 2 4 1 3 1 1 3 18 1 120 120 11 1 1 5 3 5 3 6 1 1 1 2 1 2 3 8 4 4 1 1 1 3 1 3 3 3 8 33 3 2 17 1 1 4 2 7 5 2 2 4 4 3 2 1 6 8 5 1 3 74 73 67 2 1 2 2 2 72 74 73 2 31 35 34 169 1 163 6 3 2 149 47 150 5 1 129 106 3 2 129 67 1 1 7 39 1 54 1 1 79 80 89 13 5 1 20 82 5 102 1 1 43 6 3 106 6 171 7 1 1 69 4 58 58 4 559 559 497 203 80 128 538 60 47 4 43 40 1 4 4 4 4 103 7 72 2 20 11 10 103 100 1 97 4 4 97 88 34 599 598 112 12 83 22 82 13 80 16 112 1 113 113 29 1 29 6 1 1 4 1 1 1 1 8 8 7 4 5 6 1 2 1 5 1 21 3 1 25 1 16 4 1 3 1 4 2 2 3 6 2 3 2 4 2 2 3 2 1 5 318 4 3 42 1 2 18 14 12 9 244 2 3 3 57 2 6 2 1 2 2 1 8 19 1 2 1 4 1 2 1 1 1 2 2 8 4 6 1 1 4 7 6 1 1 2 4 3 2 1 6 9 2 2 2 1 824 513 321 11 4 28 18 9 5 8 6 9 5 12 2 14 14 14 14 1 1 131 131 6 3 2 1 3 1 1 2 1 1 1 3 1 1 7 3 1 8 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 3 2 35 5 3 1 1 1 20 14 3 17 7 24 34 43 273 144 130 25 25 1 24 6 3 1 2 3 1 2 123 122 123 118 4 9 9 2 7 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> * * Fixes: * Alan Cox : Numerous verify_area() calls * Alan Cox : Set the ACK bit on a reset * Alan Cox : Stopped it crashing if it closed while * sk->inuse=1 and was trying to connect * (tcp_err()). * Alan Cox : All icmp error handling was broken * pointers passed where wrong and the * socket was looked up backwards. Nobody * tested any icmp error code obviously. * Alan Cox : tcp_err() now handled properly. It * wakes people on errors. poll * behaves and the icmp error race * has gone by moving it into sock.c * Alan Cox : tcp_send_reset() fixed to work for * everything not just packets for * unknown sockets. * Alan Cox : tcp option processing. * Alan Cox : Reset tweaked (still not 100%) [Had * syn rule wrong] * Herp Rosmanith : More reset fixes * Alan Cox : No longer acks invalid rst frames. * Acking any kind of RST is right out. * Alan Cox : Sets an ignore me flag on an rst * receive otherwise odd bits of prattle * escape still * Alan Cox : Fixed another acking RST frame bug. * Should stop LAN workplace lockups. * Alan Cox : Some tidyups using the new skb list * facilities * Alan Cox : sk->keepopen now seems to work * Alan Cox : Pulls options out correctly on accepts * Alan Cox : Fixed assorted sk->rqueue->next errors * Alan Cox : PSH doesn't end a TCP read. Switched a * bit to skb ops. * Alan Cox : Tidied tcp_data to avoid a potential * nasty. * Alan Cox : Added some better commenting, as the * tcp is hard to follow * Alan Cox : Removed incorrect check for 20 * psh * Michael O'Reilly : ack < copied bug fix. * Johannes Stille : Misc tcp fixes (not all in yet). * Alan Cox : FIN with no memory -> CRASH * Alan Cox : Added socket option proto entries. * Also added awareness of them to accept. * Alan Cox : Added TCP options (SOL_TCP) * Alan Cox : Switched wakeup calls to callbacks, * so the kernel can layer network * sockets. * Alan Cox : Use ip_tos/ip_ttl settings. * Alan Cox : Handle FIN (more) properly (we hope). * Alan Cox : RST frames sent on unsynchronised * state ack error. * Alan Cox : Put in missing check for SYN bit. * Alan Cox : Added tcp_select_window() aka NET2E * window non shrink trick. * Alan Cox : Added a couple of small NET2E timer * fixes * Charles Hedrick : TCP fixes * Toomas Tamm : TCP window fixes * Alan Cox : Small URG fix to rlogin ^C ack fight * Charles Hedrick : Rewrote most of it to actually work * Linus : Rewrote tcp_read() and URG handling * completely * Gerhard Koerting: Fixed some missing timer handling * Matthew Dillon : Reworked TCP machine states as per RFC * Gerhard Koerting: PC/TCP workarounds * Adam Caldwell : Assorted timer/timing errors * Matthew Dillon : Fixed another RST bug * Alan Cox : Move to kernel side addressing changes. * Alan Cox : Beginning work on TCP fastpathing * (not yet usable) * Arnt Gulbrandsen: Turbocharged tcp_check() routine. * Alan Cox : TCP fast path debugging * Alan Cox : Window clamping * Michael Riepe : Bug in tcp_check() * Matt Dillon : More TCP improvements and RST bug fixes * Matt Dillon : Yet more small nasties remove from the * TCP code (Be very nice to this man if * tcp finally works 100%) 8) * Alan Cox : BSD accept semantics. * Alan Cox : Reset on closedown bug. * Peter De Schrijver : ENOTCONN check missing in tcp_sendto(). * Michael Pall : Handle poll() after URG properly in * all cases. * Michael Pall : Undo the last fix in tcp_read_urg() * (multi URG PUSH broke rlogin). * Michael Pall : Fix the multi URG PUSH problem in * tcp_readable(), poll() after URG * works now. * Michael Pall : recv(...,MSG_OOB) never blocks in the * BSD api. * Alan Cox : Changed the semantics of sk->socket to * fix a race and a signal problem with * accept() and async I/O. * Alan Cox : Relaxed the rules on tcp_sendto(). * Yury Shevchuk : Really fixed accept() blocking problem. * Craig I. Hagan : Allow for BSD compatible TIME_WAIT for * clients/servers which listen in on * fixed ports. * Alan Cox : Cleaned the above up and shrank it to * a sensible code size. * Alan Cox : Self connect lockup fix. * Alan Cox : No connect to multicast. * Ross Biro : Close unaccepted children on master * socket close. * Alan Cox : Reset tracing code. * Alan Cox : Spurious resets on shutdown. * Alan Cox : Giant 15 minute/60 second timer error * Alan Cox : Small whoops in polling before an * accept. * Alan Cox : Kept the state trace facility since * it's handy for debugging. * Alan Cox : More reset handler fixes. * Alan Cox : Started rewriting the code based on * the RFC's for other useful protocol * references see: Comer, KA9Q NOS, and * for a reference on the difference * between specifications and how BSD * works see the 4.4lite source. * A.N.Kuznetsov : Don't time wait on completion of tidy * close. * Linus Torvalds : Fin/Shutdown & copied_seq changes. * Linus Torvalds : Fixed BSD port reuse to work first syn * Alan Cox : Reimplemented timers as per the RFC * and using multiple timers for sanity. * Alan Cox : Small bug fixes, and a lot of new * comments. * Alan Cox : Fixed dual reader crash by locking * the buffers (much like datagram.c) * Alan Cox : Fixed stuck sockets in probe. A probe * now gets fed up of retrying without * (even a no space) answer. * Alan Cox : Extracted closing code better * Alan Cox : Fixed the closing state machine to * resemble the RFC. * Alan Cox : More 'per spec' fixes. * Jorge Cwik : Even faster checksumming. * Alan Cox : tcp_data() doesn't ack illegal PSH * only frames. At least one pc tcp stack * generates them. * Alan Cox : Cache last socket. * Alan Cox : Per route irtt. * Matt Day : poll()->select() match BSD precisely on error * Alan Cox : New buffers * Marc Tamsky : Various sk->prot->retransmits and * sk->retransmits misupdating fixed. * Fixed tcp_write_timeout: stuck close, * and TCP syn retries gets used now. * Mark Yarvis : In tcp_read_wakeup(), don't send an * ack if state is TCP_CLOSED. * Alan Cox : Look up device on a retransmit - routes may * change. Doesn't yet cope with MSS shrink right * but it's a start! * Marc Tamsky : Closing in closing fixes. * Mike Shaver : RFC1122 verifications. * Alan Cox : rcv_saddr errors. * Alan Cox : Block double connect(). * Alan Cox : Small hooks for enSKIP. * Alexey Kuznetsov: Path MTU discovery. * Alan Cox : Support soft errors. * Alan Cox : Fix MTU discovery pathological case * when the remote claims no mtu! * Marc Tamsky : TCP_CLOSE fix. * Colin (G3TNE) : Send a reset on syn ack replies in * window but wrong (fixes NT lpd problems) * Pedro Roque : Better TCP window handling, delayed ack. * Joerg Reuter : No modification of locked buffers in * tcp_do_retransmit() * Eric Schenk : Changed receiver side silly window * avoidance algorithm to BSD style * algorithm. This doubles throughput * against machines running Solaris, * and seems to result in general * improvement. * Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD * Willy Konynenberg : Transparent proxying support. * Mike McLagan : Routing by source * Keith Owens : Do proper merging with partial SKB's in * tcp_do_sendmsg to avoid burstiness. * Eric Schenk : Fix fast close down bug with * shutdown() followed by close(). * Andi Kleen : Make poll agree with SIGIO * Salvatore Sanfilippo : Support SO_LINGER with linger == 1 and * lingertime == 0 (RFC 793 ABORT Call) * Hirokazu Takahashi : Use copy_from_user() instead of * csum_and_copy_from_user() if possible. * * Description of States: * * TCP_SYN_SENT sent a connection request, waiting for ack * * TCP_SYN_RECV received a connection request, sent ack, * waiting for final ack in three-way handshake. * * TCP_ESTABLISHED connection established * * TCP_FIN_WAIT1 our side has shutdown, waiting to complete * transmission of remaining buffered data * * TCP_FIN_WAIT2 all buffered data sent, waiting for remote * to shutdown * * TCP_CLOSING both sides have shutdown but we still have * data we have to finish sending * * TCP_TIME_WAIT timeout to catch resent junk before entering * closed, can only be entered from FIN_WAIT2 * or CLOSING. Required because the other end * may not have gotten our last ACK causing it * to retransmit the data packet (which we ignore) * * TCP_CLOSE_WAIT remote side has shutdown and is waiting for * us to finish writing our data and to shutdown * (we have to close() to move on to LAST_ACK) * * TCP_LAST_ACK out side has shutdown after remote has * shutdown. There may still be data in our * buffer that we have to finish sending * * TCP_CLOSE socket is finished */ #define pr_fmt(fmt) "TCP: " fmt #include <crypto/hash.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/poll.h> #include <linux/inet_diag.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/skbuff.h> #include <linux/scatterlist.h> #include <linux/splice.h> #include <linux/net.h> #include <linux/socket.h> #include <linux/random.h> #include <linux/memblock.h> #include <linux/highmem.h> #include <linux/cache.h> #include <linux/err.h> #include <linux/time.h> #include <linux/slab.h> #include <linux/errqueue.h> #include <linux/static_key.h> #include <linux/btf.h> #include <net/icmp.h> #include <net/inet_common.h> #include <net/tcp.h> #include <net/mptcp.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/sock.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/busy_poll.h> /* Track pending CMSGs. */ enum { TCP_CMSG_INQ = 1, TCP_CMSG_TS = 2 }; DEFINE_PER_CPU(unsigned int, tcp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); #if IS_ENABLED(CONFIG_SMC) DEFINE_STATIC_KEY_FALSE(tcp_have_smc); EXPORT_SYMBOL(tcp_have_smc); #endif /* * Current number of TCP sockets. */ struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; EXPORT_SYMBOL(tcp_sockets_allocated); /* * TCP splice context */ struct tcp_splice_state { struct pipe_inode_info *pipe; size_t len; unsigned int flags; }; /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ unsigned long tcp_memory_pressure __read_mostly; EXPORT_SYMBOL_GPL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; if (READ_ONCE(tcp_memory_pressure)) return; val = jiffies; if (!val) val--; if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); } EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure); void tcp_leave_memory_pressure(struct sock *sk) { unsigned long val; if (!READ_ONCE(tcp_memory_pressure)) return; val = xchg(&tcp_memory_pressure, 0); if (val) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, jiffies_to_msecs(jiffies - val)); } EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) { u8 res = 0; if (seconds > 0) { int period = timeout; res = 1; while (seconds > period && res < 255) { res++; timeout <<= 1; if (timeout > rto_max) timeout = rto_max; period += timeout; } } return res; } /* Convert retransmits to seconds based on initial and max timeout */ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) { int period = 0; if (retrans > 0) { period = timeout; while (--retrans) { timeout <<= 1; if (timeout > rto_max) timeout = rto_max; period += timeout; } } return period; } static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp) { u32 rate = READ_ONCE(tp->rate_delivered); u32 intv = READ_ONCE(tp->rate_interval_us); u64 rate64 = 0; if (rate && intv) { rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; do_div(rate64, intv); } return rate64; } /* Address-family independent initialization for a tcp_sock. * * NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ void tcp_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); tp->out_of_order_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT; tcp_init_xmit_timers(sk); INIT_LIST_HEAD(&tp->tsq_node); INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ tcp_snd_cwnd_set(tp, TCP_INIT_CWND); /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; tp->rate_app_limited = 1; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; tp->rack.reo_wnd_steps = 1; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); icsk->icsk_sync_mss = tcp_sync_mss; WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); tcp_scaling_ratio_init(sk); set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); sk_sockets_allocated_inc(sk); } EXPORT_SYMBOL(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) { struct sk_buff *skb = tcp_write_queue_tail(sk); if (tsflags && skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); if (tsflags & SOF_TIMESTAMPING_TX_ACK) tcb->txstamp_ack = 1; if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; } } static bool tcp_stream_is_readable(struct sock *sk, int target) { if (tcp_epollin_ready(sk, target)) return true; return sk_is_readable(sk); } /* * Wait for a TCP event. * * Note that we don't need to lock the socket, as the upper poll layers * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); u8 shutdown; int state; sock_poll_wait(file, sock, wait); state = inet_sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events * by poll logic and correct handling of state changes * made by other threads is impossible in any case. */ mask = 0; /* * EPOLLHUP is certainly not done right. But poll() doesn't * have a notion of HUP in just one direction, and for a * socket the read side is more interesting. * * Some poll() documentation says that EPOLLHUP is incompatible * with the EPOLLOUT/POLLWR flags, so somebody should check this * all. But careful, it tends to be safer to return too many * bits than too few, and you can easily break real applications * if you don't tell them that something has hung up! * * Check-me. * * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and * our fs/select.c). It means that after we received EOF, * poll always returns immediately, making impossible poll() on write() * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP * if and only if shutdown has been made in both directions. * Actually, it is interesting to look how Solaris and DUX * solve this dilemma. I would prefer, if EPOLLHUP were maskable, * then we could set it on SND_SHUTDOWN. BTW examples given * in Stevens' books assume exactly this behaviour, it explains * why EPOLLHUP is incompatible with EPOLLOUT. --ANK * * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ shutdown = READ_ONCE(sk->sk_shutdown); if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) mask |= EPOLLHUP; if (shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected or passive Fast Open socket? */ if (state != TCP_SYN_SENT && (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); u16 urg_data = READ_ONCE(tp->urg_data); if (unlikely(urg_data) && READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && !sock_flag(sk, SOCK_URGINLINE)) target++; if (tcp_stream_is_readable(sk, target)) mask |= EPOLLIN | EPOLLRDNORM; if (!(shutdown & SEND_SHUTDOWN)) { if (__sk_stream_is_writeable(sk, 1)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after * wspace test but before the flags are set, * IO signal will be lost. Memory barrier * pairs with the input side. */ smp_mb__after_atomic(); if (__sk_stream_is_writeable(sk, 1)) mask |= EPOLLOUT | EPOLLWRNORM; } } else mask |= EPOLLOUT | EPOLLWRNORM; if (urg_data & TCP_URG_VALID) mask |= EPOLLPRI; } else if (state == TCP_SYN_SENT && inet_test_bit(DEFER_CONNECT, sk)) { /* Active TCP fastopen socket with defer_connect * Return EPOLLOUT so application can call write() * in order for kernel to generate SYN+data */ mask |= EPOLLOUT | EPOLLWRNORM; } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); if (READ_ONCE(sk->sk_err) || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR; return mask; } EXPORT_SYMBOL(tcp_poll); int tcp_ioctl(struct sock *sk, int cmd, int *karg) { struct tcp_sock *tp = tcp_sk(sk); int answ; bool slow; switch (cmd) { case SIOCINQ: if (sk->sk_state == TCP_LISTEN) return -EINVAL; slow = lock_sock_fast(sk); answ = tcp_inq(sk); unlock_sock_fast(sk, slow); break; case SIOCATMARK: answ = READ_ONCE(tp->urg_data) && READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: if (sk->sk_state == TCP_LISTEN) return -EINVAL; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else answ = READ_ONCE(tp->write_seq) - tp->snd_una; break; case SIOCOUTQNSD: if (sk->sk_state == TCP_LISTEN) return -EINVAL; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else answ = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); break; default: return -ENOIOCTLCMD; } *karg = answ; return 0; } EXPORT_SYMBOL(tcp_ioctl); void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } static inline bool forced_push(const struct tcp_sock *tp) { return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } void tcp_skb_entail(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; tcp_slow_start_after_idle_check(sk); } static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) { if (flags & MSG_OOB) tp->snd_up = tp->write_seq; } /* If a not yet filled skb is pushed, do not send it if * we have data packets in Qdisc or NIC queues : * Because TX completion will happen shortly, it gives a chance * to coalesce future sendmsg() payload into this skb, without * need for a timer, and with no latency trade off. * As packets containing data payload have a bigger truesize * than pure acks (dataless) packets, the last checks prevent * autocorking if we only have an ACK in Qdisc/NIC queues, * or if TX completion was delayed after we processed ACK packet. */ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize && tcp_skb_can_collapse_to(skb); } void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int size_goal) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; skb = tcp_write_queue_tail(sk); if (!skb) return; if (!(flags & MSG_MORE) || forced_push(tp)) tcp_mark_push(tp, skb); tcp_mark_urg(tp, flags); if (tcp_should_autocork(sk, skb, size_goal)) { /* avoid atomic op if TSQ_THROTTLED bit is already set */ if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. */ if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize) return; } if (flags & MSG_MORE) nonagle = TCP_NAGLE_CORK; __tcp_push_pending_frames(sk, mss_now, nonagle); } static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct tcp_splice_state *tss = rd_desc->arg.data; int ret; ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, min(rd_desc->count, len), tss->flags); if (ret > 0) rd_desc->count -= ret; return ret; } static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) { /* Store TCP splice context information in read_descriptor_t. */ read_descriptor_t rd_desc = { .arg.data = tss, .count = tss->len, }; return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); } /** * tcp_splice_read - splice data from TCP socket to a pipe * @sock: socket to splice from * @ppos: position (not valid) * @pipe: pipe to splice to * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * Will read pages from given socket and fill them into a pipe. * **/ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct sock *sk = sock->sk; struct tcp_splice_state tss = { .pipe = pipe, .len = len, .flags = flags, }; long timeo; ssize_t spliced; int ret; sock_rps_record_flow(sk); /* * We can't seek on a socket input */ if (unlikely(*ppos)) return -ESPIPE; ret = spliced = 0; lock_sock(sk); timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK); while (tss.len) { ret = __tcp_splice_read(sk, &tss); if (ret < 0) break; else if (!ret) { if (spliced) break; if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { ret = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_state == TCP_CLOSE) { /* * This occurs when user tries to read * from never connected socket. */ ret = -ENOTCONN; break; } if (!timeo) { ret = -EAGAIN; break; } /* if __tcp_splice_read() got nothing while we have * an skb in receive queue, we do not want to loop. * This might happen with URG data. */ if (!skb_queue_empty(&sk->sk_receive_queue)) break; ret = sk_wait_data(sk, &timeo, NULL); if (ret < 0) break; if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; } continue; } tss.len -= ret; spliced += ret; if (!tss.len || !timeo) break; release_sock(sk); lock_sock(sk); if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } release_sock(sk); if (spliced) return spliced; return ret; } EXPORT_SYMBOL(tcp_splice_read); struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule) { struct sk_buff *skb; skb = alloc_skb_fclone(MAX_TCP_HEADER, gfp); if (likely(skb)) { bool mem_scheduled; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); if (force_schedule) { mem_scheduled = true; sk_forced_mem_schedule(sk, skb->truesize); } else { mem_scheduled = sk_wmem_schedule(sk, skb->truesize); } if (likely(mem_scheduled)) { skb_reserve(skb, MAX_TCP_HEADER); skb->ip_summed = CHECKSUM_PARTIAL; INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } __kfree_skb(skb); } else { sk->sk_prot->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); } return NULL; } static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); u32 new_size_goal, size_goal; if (!large_allowed) return mss_now; /* Note : tcp_tso_autosize() will eventually split this later */ new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size); /* We try hard to avoid divides here */ size_goal = tp->gso_segs * mss_now; if (unlikely(new_size_goal < size_goal || new_size_goal >= size_goal + mss_now)) { tp->gso_segs = min_t(u16, new_size_goal / mss_now, sk->sk_gso_max_segs); size_goal = tp->gso_segs * mss_now; } return max(size_goal, mss_now); } int tcp_send_mss(struct sock *sk, int *size_goal, int flags) { int mss_now; mss_now = tcp_current_mss(sk); *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); return mss_now; } /* In some cases, sendmsg() could have added an skb to the write queue, * but failed adding payload on it. We need to remove it to consume less * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger * epoll() users. Another reason is that tcp_write_xmit() does not like * finding an empty skb in the write queue. */ void tcp_remove_empty_skb(struct sock *sk) { struct sk_buff *skb = tcp_write_queue_tail(sk); if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { tcp_unlink_write_queue(skb, sk); if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); tcp_wmem_free_skb(sk, skb); } } /* skb changing from pure zc to mixed, must charge zc */ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb) { if (unlikely(skb_zcopy_pure(skb))) { u32 extra = skb->truesize - SKB_TRUESIZE(skb_end_offset(skb)); if (!sk_wmem_schedule(sk, extra)) return -ENOMEM; sk_mem_charge(sk, extra); skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; } return 0; } int tcp_wmem_schedule(struct sock *sk, int copy) { int left; if (likely(sk_wmem_schedule(sk, copy))) return copy; /* We could be in trouble if we have nothing queued. * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0] * to guarantee some progress. */ left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued; if (left > 0) sk_forced_mem_schedule(sk, min(left, copy)); return min(copy, sk->sk_forward_alloc); } void tcp_free_fastopen_req(struct tcp_sock *tp) { if (tp->fastopen_req) { kfree(tp->fastopen_req); tp->fastopen_req = NULL; } } int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size, struct ubuf_info *uarg) { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sockaddr *uaddr = msg->msg_name; int err, flags; if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), sk->sk_allocation); if (unlikely(!tp->fastopen_req)) return -ENOBUFS; tp->fastopen_req->data = msg; tp->fastopen_req->size = size; tp->fastopen_req->uarg = uarg; if (inet_test_bit(DEFER_CONNECT, sk)) { err = tcp_connect(sk); /* Same failure procedure as in tcp_v4/6_connect */ if (err) { tcp_set_state(sk, TCP_CLOSE); inet->inet_dport = 0; sk->sk_route_caps = 0; } } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, flags, 1); /* fastopen_req could already be freed in __inet_stream_connect * if the connection times out or gets rst */ if (tp->fastopen_req) { *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); inet_clear_bit(DEFER_CONNECT, sk); } return err; } int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct ubuf_info *uarg = NULL; struct sk_buff *skb; struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; int process_backlog = 0; int zc = 0; long timeo; flags = msg->msg_flags; if ((flags & MSG_ZEROCOPY) && size) { if (msg->msg_ubuf) { uarg = msg->msg_ubuf; if (sk->sk_route_caps & NETIF_F_SG) zc = MSG_ZEROCOPY; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { skb = tcp_write_queue_tail(sk); uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb)); if (!uarg) { err = -ENOBUFS; goto out_err; } if (sk->sk_route_caps & NETIF_F_SG) zc = MSG_ZEROCOPY; else uarg_to_msgzc(uarg)->zerocopy = 0; } } else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) { if (sk->sk_route_caps & NETIF_F_SG) zc = MSG_SPLICE_PAGES; } if (unlikely(flags & MSG_FASTOPEN || inet_test_bit(DEFER_CONNECT, sk)) && !tp->repair) { err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg); if (err == -EINPROGRESS && copied_syn > 0) goto out; else if (err) goto out_err; } timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); tcp_rate_check_app_limited(sk); /* is sending application-limited? */ /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && !tcp_passive_fastopen(sk)) { err = sk_stream_wait_connect(sk, &timeo); if (err != 0) goto do_error; } if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { copied = tcp_send_rcvq(sk, msg, size); goto out_nopush; } err = -EINVAL; if (tp->repair_queue == TCP_NO_QUEUE) goto out_err; /* 'common' sending to sendq */ } sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) { err = -EINVAL; goto out_err; } } /* This should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* Ok commence sending. */ copied = 0; restart: mss_now = tcp_send_mss(sk, &size_goal, flags); err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; while (msg_data_left(msg)) { ssize_t copy = 0; skb = tcp_write_queue_tail(sk); if (skb) copy = size_goal - skb->len; if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_space; if (unlikely(process_backlog >= 16)) { process_backlog = 0; if (sk_flush_backlog(sk)) goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); skb = tcp_stream_alloc_skb(sk, sk->sk_allocation, first_skb); if (!skb) goto wait_for_space; process_backlog++; tcp_skb_entail(sk, skb); copy = size_goal; /* All packets are restored as if they have * already been sent. skb_mstamp_ns isn't set to * avoid wrong rtt estimation. */ if (tp->repair) TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; } /* Try to append data to the end of skb. */ if (copy > msg_data_left(msg)) copy = msg_data_left(msg); if (zc == 0) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); if (!sk_page_frag_refill(sk, pfrag)) goto wait_for_space; if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { if (i >= READ_ONCE(sysctl_max_skb_frags)) { tcp_mark_push(tp, skb); goto new_segment; } merge = false; } copy = min_t(int, copy, pfrag->size - pfrag->offset); if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) { if (tcp_downgrade_zcopy_pure(sk, skb)) goto wait_for_space; skb_zcopy_downgrade_managed(skb); } copy = tcp_wmem_schedule(sk, copy); if (!copy) goto wait_for_space; err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, pfrag->page, pfrag->offset, copy); if (err) goto do_error; /* Update the skb. */ if (merge) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, copy); page_ref_inc(pfrag->page); } pfrag->offset += copy; } else if (zc == MSG_ZEROCOPY) { /* First append to a fragless skb builds initial * pure zerocopy skb */ if (!skb->len) skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; if (!skb_zcopy_pure(skb)) { copy = tcp_wmem_schedule(sk, copy); if (!copy) goto wait_for_space; } err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); if (err == -EMSGSIZE || err == -EEXIST) { tcp_mark_push(tp, skb); goto new_segment; } if (err < 0) goto do_error; copy = err; } else if (zc == MSG_SPLICE_PAGES) { /* Splice in data if we can; copy if we can't. */ if (tcp_downgrade_zcopy_pure(sk, skb)) goto wait_for_space; copy = tcp_wmem_schedule(sk, copy); if (!copy) goto wait_for_space; err = skb_splice_from_iter(skb, &msg->msg_iter, copy, sk->sk_allocation); if (err < 0) { if (err == -EMSGSIZE) { tcp_mark_push(tp, skb); goto new_segment; } goto do_error; } copy = err; if (!(flags & MSG_NO_SHARED_FRAGS)) skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); } if (!copied) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); copied += copy; if (!msg_data_left(msg)) { if (unlikely(flags & MSG_EOR)) TCP_SKB_CB(skb)->eor = 1; goto out; } if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair)) continue; if (forced_push(tp)) { tcp_mark_push(tp, skb); __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); } else if (skb == tcp_send_head(sk)) tcp_push_one(sk, mss_now); continue; wait_for_space: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); tcp_remove_empty_skb(sk); if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, size_goal); err = sk_stream_wait_memory(sk, &timeo); if (err != 0) goto do_error; mss_now = tcp_send_mss(sk, &size_goal, flags); } out: if (copied) { tcp_tx_timestamp(sk, sockc.tsflags); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ if (uarg && !msg->msg_ubuf) net_zcopy_put(uarg); return copied + copied_syn; do_error: tcp_remove_empty_skb(sk); if (copied + copied_syn) goto out; out_err: /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ if (uarg && !msg->msg_ubuf) net_zcopy_put_abort(uarg, true); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { sk->sk_write_space(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } return err; } EXPORT_SYMBOL_GPL(tcp_sendmsg_locked); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { int ret; lock_sock(sk); ret = tcp_sendmsg_locked(sk, msg, size); release_sock(sk); return ret; } EXPORT_SYMBOL(tcp_sendmsg); void tcp_splice_eof(struct socket *sock) { struct sock *sk = sock->sk; struct tcp_sock *tp = tcp_sk(sk); int mss_now, size_goal; if (!tcp_write_queue_tail(sk)) return; lock_sock(sk); mss_now = tcp_send_mss(sk, &size_goal, 0); tcp_push(sk, 0, mss_now, tp->nonagle, size_goal); release_sock(sk); } EXPORT_SYMBOL_GPL(tcp_splice_eof); /* * Handle reading urgent data. BSD has very simple semantics for * this, no blocking and very strange errors 8) */ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) { struct tcp_sock *tp = tcp_sk(sk); /* No URG data to read. */ if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || tp->urg_data == TCP_URG_READ) return -EINVAL; /* Yes this is right ! */ if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE)) return -ENOTCONN; if (tp->urg_data & TCP_URG_VALID) { int err = 0; char c = tp->urg_data; if (!(flags & MSG_PEEK)) WRITE_ONCE(tp->urg_data, TCP_URG_READ); /* Read urgent data. */ msg->msg_flags |= MSG_OOB; if (len > 0) { if (!(flags & MSG_TRUNC)) err = memcpy_to_msg(msg, &c, 1); len = 1; } else msg->msg_flags |= MSG_TRUNC; return err ? -EFAULT : len; } if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN)) return 0; /* Fixed the recv(..., MSG_OOB) behaviour. BSD docs and * the available implementations agree in this case: * this call should never block, independent of the * blocking state of the socket. * Mike <pall@rz.uni-karlsruhe.de> */ return -EAGAIN; } static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) { struct sk_buff *skb; int copied = 0, err = 0; /* XXX -- need to support SO_PEEK_OFF */ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { err = skb_copy_datagram_msg(skb, 0, msg, skb->len); if (err) return err; copied += skb->len; } skb_queue_walk(&sk->sk_write_queue, skb) { err = skb_copy_datagram_msg(skb, 0, msg, skb->len); if (err) break; copied += skb->len; } return err ?: copied; } /* Clean up the receive buffer for full frames taken by the user, * then send an ACK if necessary. COPIED is the number of bytes * tcp_recvmsg has given to the user so far, it speeds up the * calculation of whether or not we must ACK for the sake of * a window update. */ void __tcp_cleanup_rbuf(struct sock *sk, int copied) { struct tcp_sock *tp = tcp_sk(sk); bool time_to_ack = false; if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); if (/* Once-per-two-segments ACK was not sent by tcp_input.c */ tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || /* * If this read emptied read buffer, we send ACK, if * connection is not bidirectional, user drained * receive buffer and there was a small segment * in queue. */ (copied > 0 && ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && !inet_csk_in_pingpong_mode(sk))) && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = true; } /* We send an ACK if we can now advertise a non-zero window * which has been raised "significantly". * * Even if window raised up to infinity, do not send window open ACK * in states, where we will not receive more. It is useless. */ if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { __u32 rcv_window_now = tcp_receive_window(tp); /* Optimize, __tcp_select_window() is not cheap. */ if (2*rcv_window_now <= tp->window_clamp) { __u32 new_window = __tcp_select_window(sk); /* Send ACK now, if this read freed lots of space * in our buffer. Certainly, new_window is new window. * We can advertise it now, if it is not less than current one. * "Lots" means "at least twice" here. */ if (new_window && new_window >= 2 * rcv_window_now) time_to_ack = true; } } if (time_to_ack) tcp_send_ack(sk); } void tcp_cleanup_rbuf(struct sock *sk, int copied) { struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); struct tcp_sock *tp = tcp_sk(sk); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); __tcp_cleanup_rbuf(sk, copied); } static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); if (likely(skb->destructor == sock_rfree)) { sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; return skb_attempt_defer_free(skb); } __kfree_skb(skb); } struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; u32 offset; while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { offset = seq - TCP_SKB_CB(skb)->seq; if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { pr_err_once("%s: found a SYN, please report !\n", __func__); offset--; } if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { *off = offset; return skb; } /* This looks weird, but this can happen if TCP collapsing * splitted a fat GRO packet, while we released socket lock * in skb_splice_bits() */ tcp_eat_recv_skb(sk, skb); } return NULL; } EXPORT_SYMBOL(tcp_recv_skb); /* * This routine provides an alternative to tcp_recvmsg() for routines * that would like to handle copying from skbuffs directly in 'sendfile' * fashion. * Note: * - It is assumed that the socket was locked by the caller. * - The routine does not block. * - At present, there is no support for reading OOB data * or for 'peeking' the socket using this routine * (although both would be easy to implement). */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor) { struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); u32 seq = tp->copied_seq; u32 offset; int copied = 0; if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { if (offset < skb->len) { int used; size_t len; len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - seq; if (urg_offset < len) len = urg_offset; if (!len) break; } used = recv_actor(desc, skb, offset, len); if (used <= 0) { if (!copied) copied = used; break; } if (WARN_ON_ONCE(used > len)) used = len; seq += used; copied += used; offset += used; /* If recv_actor drops the lock (e.g. TCP splice * receive) the skb pointer might be invalid when * getting here: tcp_collapse might have deleted it * while aggregating skbs from the socket queue. */ skb = tcp_recv_skb(sk, seq - 1, &offset); if (!skb) break; /* TCP coalescing might have appended data to the skb. * Try to splice more frags */ if (offset + 1 != skb->len) continue; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { tcp_eat_recv_skb(sk, skb); ++seq; break; } tcp_eat_recv_skb(sk, skb); if (!desc->count) break; WRITE_ONCE(tp->copied_seq, seq); } WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ if (copied > 0) { tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); } return copied; } EXPORT_SYMBOL(tcp_read_sock); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct sk_buff *skb; int copied = 0; if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { u8 tcp_flags; int used; __skb_unlink(skb, &sk->sk_receive_queue); WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); tcp_flags = TCP_SKB_CB(skb)->tcp_flags; used = recv_actor(sk, skb); if (used < 0) { if (!copied) copied = used; break; } copied += used; if (tcp_flags & TCPHDR_FIN) break; } return copied; } EXPORT_SYMBOL(tcp_read_skb); void tcp_read_done(struct sock *sk, size_t len) { struct tcp_sock *tp = tcp_sk(sk); u32 seq = tp->copied_seq; struct sk_buff *skb; size_t left; u32 offset; if (sk->sk_state == TCP_LISTEN) return; left = len; while (left && (skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { int used; used = min_t(size_t, skb->len - offset, left); seq += used; left -= used; if (skb->len > offset + used) break; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { tcp_eat_recv_skb(sk, skb); ++seq; break; } tcp_eat_recv_skb(sk, skb); } WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ if (left != len) tcp_cleanup_rbuf(sk, len - left); } EXPORT_SYMBOL(tcp_read_done); int tcp_peek_len(struct socket *sock) { return tcp_inq(sock->sk); } EXPORT_SYMBOL(tcp_peek_len); /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ int tcp_set_rcvlowat(struct sock *sk, int val) { int space, cap; if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; val = min(val, cap); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); /* Check if we need to signal EPOLLIN right now */ tcp_data_ready(sk); if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) return 0; space = tcp_space_from_win(sk, val); if (space > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, space); tcp_sk(sk)->window_clamp = val; } return 0; } EXPORT_SYMBOL(tcp_set_rcvlowat); void tcp_update_recv_tstamps(struct sk_buff *skb, struct scm_timestamping_internal *tss) { if (skb->tstamp) tss->ts[0] = ktime_to_timespec64(skb->tstamp); else tss->ts[0] = (struct timespec64) {0}; if (skb_hwtstamps(skb)->hwtstamp) tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); else tss->ts[2] = (struct timespec64) {0}; } #ifdef CONFIG_MMU static const struct vm_operations_struct tcp_vm_ops = { }; int tcp_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { if (vma->vm_flags & (VM_WRITE | VM_EXEC)) return -EPERM; vm_flags_clear(vma, VM_MAYWRITE | VM_MAYEXEC); /* Instruct vm_insert_page() to not mmap_read_lock(mm) */ vm_flags_set(vma, VM_MIXEDMAP); vma->vm_ops = &tcp_vm_ops; return 0; } EXPORT_SYMBOL(tcp_mmap); static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, u32 *offset_frag) { skb_frag_t *frag; if (unlikely(offset_skb >= skb->len)) return NULL; offset_skb -= skb_headlen(skb); if ((int)offset_skb < 0 || skb_has_frag_list(skb)) return NULL; frag = skb_shinfo(skb)->frags; while (offset_skb) { if (skb_frag_size(frag) > offset_skb) { *offset_frag = offset_skb; return frag; } offset_skb -= skb_frag_size(frag); ++frag; } *offset_frag = 0; return frag; } static bool can_map_frag(const skb_frag_t *frag) { return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); } static int find_next_mappable_frag(const skb_frag_t *frag, int remaining_in_skb) { int offset = 0; if (likely(can_map_frag(frag))) return 0; while (offset < remaining_in_skb && !can_map_frag(frag)) { offset += skb_frag_size(frag); ++frag; } return offset; } static void tcp_zerocopy_set_hint_for_skb(struct sock *sk, struct tcp_zerocopy_receive *zc, struct sk_buff *skb, u32 offset) { u32 frag_offset, partial_frag_remainder = 0; int mappable_offset; skb_frag_t *frag; /* worst case: skip to next skb. try to improve on this case below */ zc->recv_skip_hint = skb->len - offset; /* Find the frag containing this offset (and how far into that frag) */ frag = skb_advance_to_frag(skb, offset, &frag_offset); if (!frag) return; if (frag_offset) { struct skb_shared_info *info = skb_shinfo(skb); /* We read part of the last frag, must recvmsg() rest of skb. */ if (frag == &info->frags[info->nr_frags - 1]) return; /* Else, we must at least read the remainder in this frag. */ partial_frag_remainder = skb_frag_size(frag) - frag_offset; zc->recv_skip_hint -= partial_frag_remainder; ++frag; } /* partial_frag_remainder: If part way through a frag, must read rest. * mappable_offset: Bytes till next mappable frag, *not* counting bytes * in partial_frag_remainder. */ mappable_offset = find_next_mappable_frag(frag, zc->recv_skip_hint); zc->recv_skip_hint = mappable_offset + partial_frag_remainder; } static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, int flags, struct scm_timestamping_internal *tss, int *cmsg_flags); static int receive_fallback_to_copy(struct sock *sk, struct tcp_zerocopy_receive *zc, int inq, struct scm_timestamping_internal *tss) { unsigned long copy_address = (unsigned long)zc->copybuf_address; struct msghdr msg = {}; struct iovec iov; int err; zc->length = 0; zc->recv_skip_hint = 0; if (copy_address != zc->copybuf_address) return -EINVAL; err = import_single_range(ITER_DEST, (void __user *)copy_address, inq, &iov, &msg.msg_iter); if (err) return err; err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT, tss, &zc->msg_flags); if (err < 0) return err; zc->copybuf_len = err; if (likely(zc->copybuf_len)) { struct sk_buff *skb; u32 offset; skb = tcp_recv_skb(sk, tcp_sk(sk)->copied_seq, &offset); if (skb) tcp_zerocopy_set_hint_for_skb(sk, zc, skb, offset); } return 0; } static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, struct sk_buff *skb, u32 copylen, u32 *offset, u32 *seq) { unsigned long copy_address = (unsigned long)zc->copybuf_address; struct msghdr msg = {}; struct iovec iov; int err; if (copy_address != zc->copybuf_address) return -EINVAL; err = import_single_range(ITER_DEST, (void __user *)copy_address, copylen, &iov, &msg.msg_iter); if (err) return err; err = skb_copy_datagram_msg(skb, *offset, &msg, copylen); if (err) return err; zc->recv_skip_hint -= copylen; *offset += copylen; *seq += copylen; return (__s32)copylen; } static int tcp_zc_handle_leftover(struct tcp_zerocopy_receive *zc, struct sock *sk, struct sk_buff *skb, u32 *seq, s32 copybuf_len, struct scm_timestamping_internal *tss) { u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint); if (!copylen) return 0; /* skb is null if inq < PAGE_SIZE. */ if (skb) { offset = *seq - TCP_SKB_CB(skb)->seq; } else { skb = tcp_recv_skb(sk, *seq, &offset); if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, tss); zc->msg_flags |= TCP_CMSG_TS; } } zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset, seq); return zc->copybuf_len < 0 ? 0 : copylen; } static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma, struct page **pending_pages, unsigned long pages_remaining, unsigned long *address, u32 *length, u32 *seq, struct tcp_zerocopy_receive *zc, u32 total_bytes_to_map, int err) { /* At least one page did not map. Try zapping if we skipped earlier. */ if (err == -EBUSY && zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT) { u32 maybe_zap_len; maybe_zap_len = total_bytes_to_map - /* All bytes to map */ *length + /* Mapped or pending */ (pages_remaining * PAGE_SIZE); /* Failed map. */ zap_page_range_single(vma, *address, maybe_zap_len, NULL); err = 0; } if (!err) { unsigned long leftover_pages = pages_remaining; int bytes_mapped; /* We called zap_page_range_single, try to reinsert. */ err = vm_insert_pages(vma, *address, pending_pages, &pages_remaining); bytes_mapped = PAGE_SIZE * (leftover_pages - pages_remaining); *seq += bytes_mapped; *address += bytes_mapped; } if (err) { /* Either we were unable to zap, OR we zapped, retried an * insert, and still had an issue. Either ways, pages_remaining * is the number of pages we were unable to map, and we unroll * some state we speculatively touched before. */ const int bytes_not_mapped = PAGE_SIZE * pages_remaining; *length -= bytes_not_mapped; zc->recv_skip_hint += bytes_not_mapped; } return err; } static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, struct page **pages, unsigned int pages_to_map, unsigned long *address, u32 *length, u32 *seq, struct tcp_zerocopy_receive *zc, u32 total_bytes_to_map) { unsigned long pages_remaining = pages_to_map; unsigned int pages_mapped; unsigned int bytes_mapped; int err; err = vm_insert_pages(vma, *address, pages, &pages_remaining); pages_mapped = pages_to_map - (unsigned int)pages_remaining; bytes_mapped = PAGE_SIZE * pages_mapped; /* Even if vm_insert_pages fails, it may have partially succeeded in * mapping (some but not all of the pages). */ *seq += bytes_mapped; *address += bytes_mapped; if (likely(!err)) return 0; /* Error: maybe zap and retry + rollback state for failed inserts. */ return tcp_zerocopy_vm_insert_batch_error(vma, pages + pages_mapped, pages_remaining, address, length, seq, zc, total_bytes_to_map, err); } #define TCP_VALID_ZC_MSG_FLAGS (TCP_CMSG_TS) static void tcp_zc_finalize_rx_tstamp(struct sock *sk, struct tcp_zerocopy_receive *zc, struct scm_timestamping_internal *tss) { unsigned long msg_control_addr; struct msghdr cmsg_dummy; msg_control_addr = (unsigned long)zc->msg_control; cmsg_dummy.msg_control_user = (void __user *)msg_control_addr; cmsg_dummy.msg_controllen = (__kernel_size_t)zc->msg_controllen; cmsg_dummy.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; cmsg_dummy.msg_control_is_user = true; zc->msg_flags = 0; if (zc->msg_control == msg_control_addr && zc->msg_controllen == cmsg_dummy.msg_controllen) { tcp_recv_timestamp(&cmsg_dummy, sk, tss); zc->msg_control = (__u64) ((uintptr_t)cmsg_dummy.msg_control_user); zc->msg_controllen = (__u64)cmsg_dummy.msg_controllen; zc->msg_flags = (__u32)cmsg_dummy.msg_flags; } } static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm, unsigned long address, bool *mmap_locked) { struct vm_area_struct *vma = lock_vma_under_rcu(mm, address); if (vma) { if (vma->vm_ops != &tcp_vm_ops) { vma_end_read(vma); return NULL; } *mmap_locked = false; return vma; } mmap_read_lock(mm); vma = vma_lookup(mm, address); if (!vma || vma->vm_ops != &tcp_vm_ops) { mmap_read_unlock(mm); return NULL; } *mmap_locked = true; return vma; } #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc, struct scm_timestamping_internal *tss) { u32 length = 0, offset, vma_len, avail_len, copylen = 0; unsigned long address = (unsigned long)zc->address; struct page *pages[TCP_ZEROCOPY_PAGE_BATCH_SIZE]; s32 copybuf_len = zc->copybuf_len; struct tcp_sock *tp = tcp_sk(sk); const skb_frag_t *frags = NULL; unsigned int pages_to_map = 0; struct vm_area_struct *vma; struct sk_buff *skb = NULL; u32 seq = tp->copied_seq; u32 total_bytes_to_map; int inq = tcp_inq(sk); bool mmap_locked; int ret; zc->copybuf_len = 0; zc->msg_flags = 0; if (address & (PAGE_SIZE - 1) || address != zc->address) return -EINVAL; if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; sock_rps_record_flow(sk); if (inq && inq <= copybuf_len) return receive_fallback_to_copy(sk, zc, inq, tss); if (inq < PAGE_SIZE) { zc->length = 0; zc->recv_skip_hint = inq; if (!inq && sock_flag(sk, SOCK_DONE)) return -EIO; return 0; } vma = find_tcp_vma(current->mm, address, &mmap_locked); if (!vma) return -EINVAL; vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); avail_len = min_t(u32, vma_len, inq); total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1); if (total_bytes_to_map) { if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT)) zap_page_range_single(vma, address, total_bytes_to_map, NULL); zc->length = total_bytes_to_map; zc->recv_skip_hint = 0; } else { zc->length = avail_len; zc->recv_skip_hint = avail_len; } ret = 0; while (length + PAGE_SIZE <= zc->length) { int mappable_offset; struct page *page; if (zc->recv_skip_hint < PAGE_SIZE) { u32 offset_frag; if (skb) { if (zc->recv_skip_hint > 0) break; skb = skb->next; offset = seq - TCP_SKB_CB(skb)->seq; } else { skb = tcp_recv_skb(sk, seq, &offset); } if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, tss); zc->msg_flags |= TCP_CMSG_TS; } zc->recv_skip_hint = skb->len - offset; frags = skb_advance_to_frag(skb, offset, &offset_frag); if (!frags || offset_frag) break; } mappable_offset = find_next_mappable_frag(frags, zc->recv_skip_hint); if (mappable_offset) { zc->recv_skip_hint = mappable_offset; break; } page = skb_frag_page(frags); prefetchw(page); pages[pages_to_map++] = page; length += PAGE_SIZE; zc->recv_skip_hint -= PAGE_SIZE; frags++; if (pages_to_map == TCP_ZEROCOPY_PAGE_BATCH_SIZE || zc->recv_skip_hint < PAGE_SIZE) { /* Either full batch, or we're about to go to next skb * (and we cannot unroll failed ops across skbs). */ ret = tcp_zerocopy_vm_insert_batch(vma, pages, pages_to_map, &address, &length, &seq, zc, total_bytes_to_map); if (ret) goto out; pages_to_map = 0; } } if (pages_to_map) { ret = tcp_zerocopy_vm_insert_batch(vma, pages, pages_to_map, &address, &length, &seq, zc, total_bytes_to_map); } out: if (mmap_locked) mmap_read_unlock(current->mm); else vma_end_read(vma); /* Try to copy straggler data. */ if (!ret) copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss); if (length + copylen) { WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, length + copylen); ret = 0; if (length == zc->length) zc->recv_skip_hint = 0; } else { if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE)) ret = -EIO; } zc->length = length; return ret; } #endif /* Similar to __sock_recv_timestamp, but does not require an skb */ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss) { int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); bool has_timestamping = false; if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { if (sock_flag(sk, SOCK_RCVTSTAMP)) { if (sock_flag(sk, SOCK_RCVTSTAMPNS)) { if (new_tstamp) { struct __kernel_timespec kts = { .tv_sec = tss->ts[0].tv_sec, .tv_nsec = tss->ts[0].tv_nsec, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, sizeof(kts), &kts); } else { struct __kernel_old_timespec ts_old = { .tv_sec = tss->ts[0].tv_sec, .tv_nsec = tss->ts[0].tv_nsec, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, sizeof(ts_old), &ts_old); } } else { if (new_tstamp) { struct __kernel_sock_timeval stv = { .tv_sec = tss->ts[0].tv_sec, .tv_usec = tss->ts[0].tv_nsec / 1000, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, sizeof(stv), &stv); } else { struct __kernel_old_timeval tv = { .tv_sec = tss->ts[0].tv_sec, .tv_usec = tss->ts[0].tv_nsec / 1000, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, sizeof(tv), &tv); } } } if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE) has_timestamping = true; else tss->ts[0] = (struct timespec64) {0}; } if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE) has_timestamping = true; else tss->ts[2] = (struct timespec64) {0}; } if (has_timestamping) { tss->ts[1] = (struct timespec64) {0}; if (sock_flag(sk, SOCK_TSTAMP_NEW)) put_cmsg_scm_timestamping64(msg, tss); else put_cmsg_scm_timestamping(msg, tss); } } static int tcp_inq_hint(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); u32 copied_seq = READ_ONCE(tp->copied_seq); u32 rcv_nxt = READ_ONCE(tp->rcv_nxt); int inq; inq = rcv_nxt - copied_seq; if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) { lock_sock(sk); inq = tp->rcv_nxt - tp->copied_seq; release_sock(sk); } /* After receiving a FIN, tell the user-space to continue reading * by returning a non-zero inq. */ if (inq == 0 && sock_flag(sk, SOCK_DONE)) inq = 1; return inq; } /* * This routine copies from a sock struct into the user buffer. * * Technical note: in 2.3 we work on _locked_ socket, so that * tricks with *seq access order and skb->users are not required. * Probably, code can be easily improved even more. */ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, int flags, struct scm_timestamping_internal *tss, int *cmsg_flags) { struct tcp_sock *tp = tcp_sk(sk); int copied = 0; u32 peek_seq; u32 *seq; unsigned long used; int err; int target; /* Read at least this many bytes */ long timeo; struct sk_buff *skb, *last; u32 urg_hole = 0; err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; if (tp->recvmsg_inq) { *cmsg_flags = TCP_CMSG_INQ; msg->msg_get_inq = 1; } timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); /* Urgent data needs to be handled specially. */ if (flags & MSG_OOB) goto recv_urg; if (unlikely(tp->repair)) { err = -EPERM; if (!(flags & MSG_PEEK)) goto out; if (tp->repair_queue == TCP_SEND_QUEUE) goto recv_sndq; err = -EINVAL; if (tp->repair_queue == TCP_NO_QUEUE) goto out; /* 'common' recv queue MSG_PEEK-ing */ } seq = &tp->copied_seq; if (flags & MSG_PEEK) { peek_seq = tp->copied_seq; seq = &peek_seq; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); do { u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ if (unlikely(tp->urg_data) && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } /* Next get a buffer. */ last = skb_peek_tail(&sk->sk_receive_queue); skb_queue_walk(&sk->sk_receive_queue, skb) { last = skb; /* Now that we have two receive queues this * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags)) break; offset = *seq - TCP_SKB_CB(skb)->seq; if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { pr_err_once("%s: found a SYN, please report !\n", __func__); offset--; } if (offset < skb->len) goto found_ok_skb; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; WARN(!(flags & MSG_PEEK), "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); } /* Well, if we have backlog, try to process it now yet. */ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { if (!timeo || sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_state == TCP_CLOSE) { /* This occurs when user tries to read * from never connected socket. */ copied = -ENOTCONN; break; } if (!timeo) { copied = -EAGAIN; break; } if (signal_pending(current)) { copied = sock_intr_errno(timeo); break; } } if (copied >= target) { /* Do not sleep, just process backlog. */ __sk_flush_backlog(sk); } else { tcp_cleanup_rbuf(sk, copied); err = sk_wait_data(sk, &timeo, last); if (err < 0) { err = copied ? : err; goto out; } } if ((flags & MSG_PEEK) && (peek_seq - copied - urg_hole != tp->copied_seq)) { net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", current->comm, task_pid_nr(current)); peek_seq = tp->copied_seq; } continue; found_ok_skb: /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) used = len; /* Do we have urgent data here? */ if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { WRITE_ONCE(*seq, *seq + 1); urg_hole++; offset++; used--; if (!used) goto skip_copy; } } else used = urg_offset; } } if (!(flags & MSG_TRUNC)) { err = skb_copy_datagram_msg(skb, offset, msg, used); if (err) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } WRITE_ONCE(*seq, *seq + used); copied += used; len -= used; tcp_rcv_space_adjust(sk); skip_copy: if (unlikely(tp->urg_data) && after(tp->copied_seq, tp->urg_seq)) { WRITE_ONCE(tp->urg_data, 0); tcp_fast_path_check(sk); } if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, tss); *cmsg_flags |= TCP_CMSG_TS; } if (used + offset < skb->len) continue; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) tcp_eat_recv_skb(sk, skb); continue; found_fin_ok: /* Process the FIN. */ WRITE_ONCE(*seq, *seq + 1); if (!(flags & MSG_PEEK)) tcp_eat_recv_skb(sk, skb); break; } while (len > 0); /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); return copied; out: return err; recv_urg: err = tcp_recv_urg(sk, msg, len, flags); goto out; recv_sndq: err = tcp_peek_sndq(sk, msg, len); goto out; } int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { int cmsg_flags = 0, ret; struct scm_timestamping_internal tss; if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && sk->sk_state == TCP_ESTABLISHED) sk_busy_loop(sk, flags & MSG_DONTWAIT); lock_sock(sk); ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags); release_sock(sk); if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); if (msg->msg_get_inq) { msg->msg_inq = tcp_inq_hint(sk); if (cmsg_flags & TCP_CMSG_INQ) put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(msg->msg_inq), &msg->msg_inq); } } return ret; } EXPORT_SYMBOL(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; /* We defined a new enum for TCP states that are exported in BPF * so as not force the internal TCP states to be frozen. The * following checks will detect if an internal state value ever * differs from the BPF value. If this ever happens, then we will * need to remap the internal value to the BPF value before calling * tcp_call_bpf_2arg. */ BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED); BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT); BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV); BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1); BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2); BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT); BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE); BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT); BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK); BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN); BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING); BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV); BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES); /* bpf uapi header bpf.h defines an anonymous enum with values * BPF_TCP_* used by bpf programs. Currently gcc built vmlinux * is able to emit this enum in DWARF due to the above BUILD_BUG_ON. * But clang built vmlinux does not have this enum in DWARF * since clang removes the above code before generating IR/debuginfo. * Let us explicitly emit the type debuginfo to ensure the * above-mentioned anonymous enum in the vmlinux DWARF and hence BTF * regardless of which compiler is used. */ BTF_TYPE_EMIT_ENUM(BPF_TCP_ESTABLISHED); if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG)) tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state); switch (state) { case TCP_ESTABLISHED: if (oldstate != TCP_ESTABLISHED) TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); break; case TCP_CLOSE: if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); fallthrough; default: if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); } /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ inet_sk_state_store(sk, state); } EXPORT_SYMBOL_GPL(tcp_set_state); /* * State processing on a close. This implements the state shift for * sending our FIN frame. Note that we only send a FIN for some * states. A shutdown() may have already sent the FIN, or we may be * closed. */ static const unsigned char new_state[16] = { /* current state: new state: action: */ [0 /* (Invalid) */] = TCP_CLOSE, [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, [TCP_SYN_SENT] = TCP_CLOSE, [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, [TCP_TIME_WAIT] = TCP_CLOSE, [TCP_CLOSE] = TCP_CLOSE, [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, [TCP_LAST_ACK] = TCP_LAST_ACK, [TCP_LISTEN] = TCP_CLOSE, [TCP_CLOSING] = TCP_CLOSING, [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ }; static int tcp_close_state(struct sock *sk) { int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; tcp_set_state(sk, ns); return next & TCP_ACTION_FIN; } /* * Shutdown the sending side of a connection. Much like close except * that we don't receive shut down or sock_set_flag(sk, SOCK_DEAD). */ void tcp_shutdown(struct sock *sk, int how) { /* We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. */ if (!(how & SEND_SHUTDOWN)) return; /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); } } EXPORT_SYMBOL(tcp_shutdown); int tcp_orphan_count_sum(void) { int i, total = 0; for_each_possible_cpu(i) total += per_cpu(tcp_orphan_count, i); return max(total, 0); } static int tcp_orphan_cache; static struct timer_list tcp_orphan_timer; #define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100) static void tcp_orphan_update(struct timer_list *unused) { WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum()); mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); } static bool tcp_too_many_orphans(int shift) { return READ_ONCE(tcp_orphan_cache) << shift > READ_ONCE(sysctl_tcp_max_orphans); } bool tcp_check_oom(struct sock *sk, int shift) { bool too_many_orphans, out_of_socket_memory; too_many_orphans = tcp_too_many_orphans(shift); out_of_socket_memory = tcp_out_of_memory(sk); if (too_many_orphans) net_info_ratelimited("too many orphaned sockets\n"); if (out_of_socket_memory) net_info_ratelimited("out of memory -- consider tuning tcp_mem\n"); return too_many_orphans || out_of_socket_memory; } void __tcp_close(struct sock *sk, long timeout) { struct sk_buff *skb; int data_was_unread = 0; int state; WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); /* Special case. */ inet_csk_listen_stop(sk); goto adjudge_to_death; } /* We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) len--; data_was_unread += len; __kfree_skb(skb); } /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ if (sk->sk_state == TCP_CLOSE) goto adjudge_to_death; /* As outlined in RFC 2525, section 2.17, we send a RST here because * data was lost. To witness the awful effects of the old behavior of * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk * GET in an FTP client, suspend the process, wait for the client to * advertise a zero window, then kill -9 the FTP client, wheee... * Note: timeout is always zero in such a case. */ if (unlikely(tcp_sk(sk)->repair)) { sk->sk_prot->disconnect(sk, 0); } else if (data_was_unread) { /* Unread data was tossed, zap the connection. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, sk->sk_allocation); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. */ /* RED-PEN. Formally speaking, we have broken TCP state * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), * rather than queued out of window. Purists blame. * * F.e. "RFC state" is ESTABLISHED, * if Linux state is FIN-WAIT-1, but FIN is still not sent. * * The visible declinations are that sometimes * we enter time-wait state, when it is not required really * (harmless), do not send active resets, when they are * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when * they look as CLOSING or LAST_ACK for Linux) * Probably, I missed some more holelets. * --ANK * XXX (TFO) - To start off we don't support SYN+ACK+FIN * in a single packet! (May consider it later but will * probably need API support or TCP_CORK SYN-ACK until * data is written and socket is closed.) */ tcp_send_fin(sk); } sk_stream_wait_close(sk, timeout); adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); local_bh_disable(); bh_lock_sock(sk); /* remove backlog if any, without releasing ownership. */ __release_sock(sk); this_cpu_inc(tcp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) goto out; /* This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could * keep a socket open forever with no application left this end. * We use a 1 minute timeout (about the same as BSD) then kill * our end. If they send after that then tough - BUT: long enough * that we won't make the old 4*rto = almost no time - whoops * reset mistake. * * Nope, it was not mistake. It is really desired behaviour * f.e. on http servers, when such sockets are useless, but * consume significant resources. Let's do it with special * linger2 option. --ANK */ if (sk->sk_state == TCP_FIN_WAIT2) { struct tcp_sock *tp = tcp_sk(sk); if (READ_ONCE(tp->linger2) < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } } } if (sk->sk_state != TCP_CLOSE) { if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } else if (!check_net(sock_net(sk))) { /* Not possible to send reset; just close */ tcp_set_state(sk, TCP_CLOSE); } } if (sk->sk_state == TCP_CLOSE) { struct request_sock *req; req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, lockdep_sock_is_held(sk)); /* We could get here with a non-NULL req if the socket is * aborted (e.g., closed with unread data) before 3WHS * finishes. */ if (req) reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); } /* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); local_bh_enable(); } void tcp_close(struct sock *sk, long timeout) { lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); /* These states need RST on ABORT according to RFC793 */ static inline bool tcp_need_reset(int state) { return (1 << state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_SYN_RECV); } static void tcp_rtx_queue_purge(struct sock *sk) { struct rb_node *p = rb_first(&sk->tcp_rtx_queue); tcp_sk(sk)->highest_sack = NULL; while (p) { struct sk_buff *skb = rb_to_skb(p); p = rb_next(p); /* Since we are deleting whole queue, no need to * list_del(&skb->tcp_tsorted_anchor) */ tcp_rtx_queue_unlink(skb, sk); tcp_wmem_free_skb(sk, skb); } } void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { tcp_skb_tsorted_anchor_cleanup(skb); tcp_wmem_free_skb(sk, skb); } tcp_rtx_queue_purge(sk); INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); tcp_clear_all_retrans_hints(tcp_sk(sk)); tcp_sk(sk)->packets_out = 0; inet_csk(sk)->icsk_backoff = 0; } int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int old_state = sk->sk_state; u32 seq; if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); /* ABORT function of RFC793 */ if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { WRITE_ONCE(sk->sk_err, ECONNABORTED); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ tcp_send_active_reset(sk, gfp_any()); WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) WRITE_ONCE(sk->sk_err, ECONNRESET); tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); WRITE_ONCE(tp->urg_data, 0); tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; inet_bhash2_reset_saddr(sk); WRITE_ONCE(sk->sk_shutdown, 0); sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->rcv_rtt_last_tsecr = 0; seq = tp->write_seq + tp->max_window + 2; if (!seq) seq = 1; WRITE_ONCE(tp->write_seq, seq); icsk->icsk_backoff = 0; icsk->icsk_probes_out = 0; icsk->icsk_probes_tstamp = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_snd_cwnd_set(tp, TCP_INIT_CWND); tp->snd_cwnd_cnt = 0; tp->is_cwnd_limited = 0; tp->max_packets_out = 0; tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; if (icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); icsk->icsk_ca_initialized = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() */ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; tp->segs_out = 0; tp->bytes_sent = 0; tp->bytes_acked = 0; tp->bytes_received = 0; tp->bytes_retrans = 0; tp->data_segs_in = 0; tp->data_segs_out = 0; tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; tp->reord_seen = 0; tp->retrans_out = 0; tp->sacked_out = 0; tp->tlp_high_seq = 0; tp->last_oow_ack_time = 0; tp->plb_rehash = 0; /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; tp->rate_app_limited = 1; tp->rack.mstamp = 0; tp->rack.advanced = 0; tp->rack.reo_wnd_steps = 1; tp->rack.last_delivered = 0; tp->rack.reo_wnd_persist = 0; tp->rack.dsack_seen = 0; tp->syn_data_acked = 0; tp->rx_opt.saw_tstamp = 0; tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); inet_clear_bit(DEFER_CONNECT, sk); tp->fastopen_client_fail = 0; WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); if (sk->sk_frag.page) { put_page(sk->sk_frag.page); sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; } sk_error_report(sk); return 0; } EXPORT_SYMBOL(tcp_disconnect); static inline bool tcp_can_repair_sock(const struct sock *sk) { return sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && (sk->sk_state != TCP_LISTEN); } static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len) { struct tcp_repair_window opt; if (!tp->repair) return -EPERM; if (len != sizeof(opt)) return -EINVAL; if (copy_from_sockptr(&opt, optbuf, sizeof(opt))) return -EFAULT; if (opt.max_window < opt.snd_wnd) return -EINVAL; if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd)) return -EINVAL; if (after(opt.rcv_wup, tp->rcv_nxt)) return -EINVAL; tp->snd_wl1 = opt.snd_wl1; tp->snd_wnd = opt.snd_wnd; tp->max_window = opt.max_window; tp->rcv_wnd = opt.rcv_wnd; tp->rcv_wup = opt.rcv_wup; return 0; } static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_repair_opt opt; size_t offset = 0; while (len >= sizeof(opt)) { if (copy_from_sockptr_offset(&opt, optbuf, offset, sizeof(opt))) return -EFAULT; offset += sizeof(opt); len -= sizeof(opt); switch (opt.opt_code) { case TCPOPT_MSS: tp->rx_opt.mss_clamp = opt.opt_val; tcp_mtup_init(sk); break; case TCPOPT_WINDOW: { u16 snd_wscale = opt.opt_val & 0xFFFF; u16 rcv_wscale = opt.opt_val >> 16; if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE) return -EFBIG; tp->rx_opt.snd_wscale = snd_wscale; tp->rx_opt.rcv_wscale = rcv_wscale; tp->rx_opt.wscale_ok = 1; } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) return -EINVAL; tp->rx_opt.sack_ok |= TCP_SACK_SEEN; break; case TCPOPT_TIMESTAMP: if (opt.opt_val != 0) return -EINVAL; tp->rx_opt.tstamp_ok = 1; break; } } return 0; } DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); EXPORT_SYMBOL(tcp_tx_delay_enabled); static void tcp_enable_tx_delay(void) { if (!static_branch_unlikely(&tcp_tx_delay_enabled)) { static int __tcp_tx_delay_enabled = 0; if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) { static_branch_enable(&tcp_tx_delay_enabled); pr_info("TCP_TX_DELAY enabled\n"); } } } /* When set indicates to always queue non-full frames. Later the user clears * this option and we transmit any pending partial frames in the queue. This is * meant to be used alongside sendfile() to get properly filled frames when the * user (for example) must write out headers with a write() call first and then * use sendfile to send out the data parts. * * TCP_CORK can be set together with TCP_NODELAY and it is stronger than * TCP_NODELAY. */ void __tcp_sock_set_cork(struct sock *sk, bool on) { struct tcp_sock *tp = tcp_sk(sk); if (on) { tp->nonagle |= TCP_NAGLE_CORK; } else { tp->nonagle &= ~TCP_NAGLE_CORK; if (tp->nonagle & TCP_NAGLE_OFF) tp->nonagle |= TCP_NAGLE_PUSH; tcp_push_pending_frames(sk); } } void tcp_sock_set_cork(struct sock *sk, bool on) { lock_sock(sk); __tcp_sock_set_cork(sk, on); release_sock(sk); } EXPORT_SYMBOL(tcp_sock_set_cork); /* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is * remembered, but it is not activated until cork is cleared. * * However, when TCP_NODELAY is set we make an explicit push, which overrides * even TCP_CORK for currently queued segments. */ void __tcp_sock_set_nodelay(struct sock *sk, bool on) { if (on) { tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; tcp_push_pending_frames(sk); } else { tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF; } } void tcp_sock_set_nodelay(struct sock *sk) { lock_sock(sk); __tcp_sock_set_nodelay(sk, true); release_sock(sk); } EXPORT_SYMBOL(tcp_sock_set_nodelay); static void __tcp_sock_set_quickack(struct sock *sk, int val) { if (!val) { inet_csk_enter_pingpong_mode(sk); return; } inet_csk_exit_pingpong_mode(sk); if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED; tcp_cleanup_rbuf(sk, 1); if (!(val & 1)) inet_csk_enter_pingpong_mode(sk); } } void tcp_sock_set_quickack(struct sock *sk, int val) { lock_sock(sk); __tcp_sock_set_quickack(sk, val); release_sock(sk); } EXPORT_SYMBOL(tcp_sock_set_quickack); int tcp_sock_set_syncnt(struct sock *sk, int val) { if (val < 1 || val > MAX_TCP_SYNCNT) return -EINVAL; WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val); return 0; } EXPORT_SYMBOL(tcp_sock_set_syncnt); int tcp_sock_set_user_timeout(struct sock *sk, int val) { /* Cap the max time in ms TCP will retry or probe the window * before giving up and aborting (ETIMEDOUT) a connection. */ if (val < 0) return -EINVAL; WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val); return 0; } EXPORT_SYMBOL(tcp_sock_set_user_timeout); int tcp_sock_set_keepidle_locked(struct sock *sk, int val) { struct tcp_sock *tp = tcp_sk(sk); if (val < 1 || val > MAX_TCP_KEEPIDLE) return -EINVAL; /* Paired with WRITE_ONCE() in keepalive_time_when() */ WRITE_ONCE(tp->keepalive_time, val * HZ); if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { u32 elapsed = keepalive_time_elapsed(tp); if (tp->keepalive_time > elapsed) elapsed = tp->keepalive_time - elapsed; else elapsed = 0; inet_csk_reset_keepalive_timer(sk, elapsed); } return 0; } int tcp_sock_set_keepidle(struct sock *sk, int val) { int err; lock_sock(sk); err = tcp_sock_set_keepidle_locked(sk, val); release_sock(sk); return err; } EXPORT_SYMBOL(tcp_sock_set_keepidle); int tcp_sock_set_keepintvl(struct sock *sk, int val) { if (val < 1 || val > MAX_TCP_KEEPINTVL) return -EINVAL; WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ); return 0; } EXPORT_SYMBOL(tcp_sock_set_keepintvl); int tcp_sock_set_keepcnt(struct sock *sk, int val) { if (val < 1 || val > MAX_TCP_KEEPCNT) return -EINVAL; /* Paired with READ_ONCE() in keepalive_probes() */ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val); return 0; } EXPORT_SYMBOL(tcp_sock_set_keepcnt); int tcp_set_window_clamp(struct sock *sk, int val) { struct tcp_sock *tp = tcp_sk(sk); if (!val) { if (sk->sk_state != TCP_CLOSE) return -EINVAL; tp->window_clamp = 0; } else { tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? SOCK_MIN_RCVBUF / 2 : val; tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); } return 0; } /* * Socket option code for TCP. */ int do_tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); int val; int err = 0; /* These are data/string values, all the others are ints */ switch (optname) { case TCP_CONGESTION: { char name[TCP_CA_NAME_MAX]; if (optlen < 1) return -EINVAL; val = strncpy_from_sockptr(name, optval, min_t(long, TCP_CA_NAME_MAX-1, optlen)); if (val < 0) return -EFAULT; name[val] = 0; sockopt_lock_sock(sk); err = tcp_set_congestion_control(sk, name, !has_current_bpf_ctx(), sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)); sockopt_release_sock(sk); return err; } case TCP_ULP: { char name[TCP_ULP_NAME_MAX]; if (optlen < 1) return -EINVAL; val = strncpy_from_sockptr(name, optval, min_t(long, TCP_ULP_NAME_MAX - 1, optlen)); if (val < 0) return -EFAULT; name[val] = 0; sockopt_lock_sock(sk); err = tcp_set_ulp(sk, name); sockopt_release_sock(sk); return err; } case TCP_FASTOPEN_KEY: { __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; __u8 *backup_key = NULL; /* Allow a backup key as well to facilitate key rotation * First key is the active one. */ if (optlen != TCP_FASTOPEN_KEY_LENGTH && optlen != TCP_FASTOPEN_KEY_BUF_LENGTH) return -EINVAL; if (copy_from_sockptr(key, optval, optlen)) return -EFAULT; if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH) backup_key = key + TCP_FASTOPEN_KEY_LENGTH; return tcp_fastopen_reset_cipher(net, sk, key, backup_key); } default: /* fallthru */ break; } if (optlen < sizeof(int)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; /* Handle options that can be set without locking the socket. */ switch (optname) { case TCP_SYNCNT: return tcp_sock_set_syncnt(sk, val); case TCP_USER_TIMEOUT: return tcp_sock_set_user_timeout(sk, val); case TCP_KEEPINTVL: return tcp_sock_set_keepintvl(sk, val); case TCP_KEEPCNT: return tcp_sock_set_keepcnt(sk, val); case TCP_LINGER2: if (val < 0) WRITE_ONCE(tp->linger2, -1); else if (val > TCP_FIN_TIMEOUT_MAX / HZ) WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); else WRITE_ONCE(tp->linger2, val * HZ); return 0; case TCP_DEFER_ACCEPT: /* Translate value in seconds to number of retransmits */ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ)); return 0; } sockopt_lock_sock(sk); switch (optname) { case TCP_MAXSEG: /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) { err = -EINVAL; break; } tp->rx_opt.user_mss = val; break; case TCP_NODELAY: __tcp_sock_set_nodelay(sk, val); break; case TCP_THIN_LINEAR_TIMEOUTS: if (val < 0 || val > 1) err = -EINVAL; else tp->thin_lto = val; break; case TCP_THIN_DUPACK: if (val < 0 || val > 1) err = -EINVAL; break; case TCP_REPAIR: if (!tcp_can_repair_sock(sk)) err = -EPERM; else if (val == TCP_REPAIR_ON) { tp->repair = 1; sk->sk_reuse = SK_FORCE_REUSE; tp->repair_queue = TCP_NO_QUEUE; } else if (val == TCP_REPAIR_OFF) { tp->repair = 0; sk->sk_reuse = SK_NO_REUSE; tcp_send_window_probe(sk); } else if (val == TCP_REPAIR_OFF_NO_WP) { tp->repair = 0; sk->sk_reuse = SK_NO_REUSE; } else err = -EINVAL; break; case TCP_REPAIR_QUEUE: if (!tp->repair) err = -EPERM; else if ((unsigned int)val < TCP_QUEUES_NR) tp->repair_queue = val; else err = -EINVAL; break; case TCP_QUEUE_SEQ: if (sk->sk_state != TCP_CLOSE) { err = -EPERM; } else if (tp->repair_queue == TCP_SEND_QUEUE) { if (!tcp_rtx_queue_empty(sk)) err = -EPERM; else WRITE_ONCE(tp->write_seq, val); } else if (tp->repair_queue == TCP_RECV_QUEUE) { if (tp->rcv_nxt != tp->copied_seq) { err = -EPERM; } else { WRITE_ONCE(tp->rcv_nxt, val); WRITE_ONCE(tp->copied_seq, val); } } else { err = -EINVAL; } break; case TCP_REPAIR_OPTIONS: if (!tp->repair) err = -EINVAL; else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent) err = tcp_repair_options_est(sk, optval, optlen); else err = -EPERM; break; case TCP_CORK: __tcp_sock_set_cork(sk, val); break; case TCP_KEEPIDLE: err = tcp_sock_set_keepidle_locked(sk, val); break; case TCP_SAVE_SYN: /* 0: disable, 1: enable, 2: start from ether_header */ if (val < 0 || val > 2) err = -EINVAL; else tp->save_syn = val; break; case TCP_WINDOW_CLAMP: err = tcp_set_window_clamp(sk, val); break; case TCP_QUICKACK: __tcp_sock_set_quickack(sk, val); break; case TCP_AO_REPAIR: err = tcp_ao_set_repair(sk, optval, optlen); break; #ifdef CONFIG_TCP_AO case TCP_AO_ADD_KEY: case TCP_AO_DEL_KEY: case TCP_AO_INFO: { /* If this is the first TCP-AO setsockopt() on the socket, * sk_state has to be LISTEN or CLOSE. Allow TCP_REPAIR * in any state. */ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto ao_parse; if (rcu_dereference_protected(tcp_sk(sk)->ao_info, lockdep_sock_is_held(sk))) goto ao_parse; if (tp->repair) goto ao_parse; err = -EISCONN; break; ao_parse: err = tp->af_specific->ao_parse(sk, optname, optval, optlen); break; } #endif #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: err = tp->af_specific->md5_parse(sk, optname, optval, optlen); break; #endif case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { tcp_fastopen_init_key_once(net); fastopen_queue_tune(sk, val); } else { err = -EINVAL; } break; case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else err = -EINVAL; } else { err = -EOPNOTSUPP; } break; case TCP_FASTOPEN_NO_COOKIE: if (val > 1 || val < 0) err = -EINVAL; else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) err = -EINVAL; else tp->fastopen_no_cookie = val; break; case TCP_TIMESTAMP: if (!tp->repair) { err = -EPERM; break; } /* val is an opaque field, * and low order bit contains usec_ts enable bit. * Its a best effort, and we do not care if user makes an error. */ tp->tcp_usec_ts = val & 1; WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts)); break; case TCP_REPAIR_WINDOW: err = tcp_repair_set_window(tp, optval, optlen); break; case TCP_NOTSENT_LOWAT: WRITE_ONCE(tp->notsent_lowat, val); sk->sk_write_space(sk); break; case TCP_INQ: if (val > 1 || val < 0) err = -EINVAL; else tp->recvmsg_inq = val; break; case TCP_TX_DELAY: if (val) tcp_enable_tx_delay(); WRITE_ONCE(tp->tcp_tx_delay, val); break; default: err = -ENOPROTOOPT; break; } sockopt_release_sock(sk); return err; } int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) { u64 stats[__TCP_CHRONO_MAX], total = 0; enum tcp_chrono i; for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { stats[i] = tp->chrono_stat[i - 1]; if (i == tp->chrono_type) stats[i] += tcp_jiffies32 - tp->chrono_start; stats[i] *= USEC_PER_SEC / HZ; total += stats[i]; } info->tcpi_busy_time = total; info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED]; info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED]; } /* Return information about state of tcp endpoint in API format. */ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); unsigned long rate; u32 now; u64 rate64; bool slow; memset(info, 0, sizeof(*info)); if (sk->sk_type != SOCK_STREAM) return; info->tcpi_state = inet_sk_state_load(sk); /* Report meaningful fields for all TCP states, including listeners */ rate = READ_ONCE(sk->sk_pacing_rate); rate64 = (rate != ~0UL) ? rate : ~0ULL; info->tcpi_pacing_rate = rate64; rate = READ_ONCE(sk->sk_max_pacing_rate); rate64 = (rate != ~0UL) ? rate : ~0ULL; info->tcpi_max_pacing_rate = rate64; info->tcpi_reordering = tp->reordering; info->tcpi_snd_cwnd = tcp_snd_cwnd(tp); if (info->tcpi_state == TCP_LISTEN) { /* listeners aliased fields : * tcpi_unacked -> Number of children ready for accept() * tcpi_sacked -> max backlog */ info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog); info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog); return; } slow = lock_sock_fast(sk); info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) info->tcpi_options |= TCPI_OPT_TIMESTAMPS; if (tcp_is_sack(tp)) info->tcpi_options |= TCPI_OPT_SACK; if (tp->rx_opt.wscale_ok) { info->tcpi_options |= TCPI_OPT_WSCALE; info->tcpi_snd_wscale = tp->rx_opt.snd_wscale; info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; } if (tp->ecn_flags & TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; if (tp->syn_data_acked) info->tcpi_options |= TCPI_OPT_SYN_DATA; if (tp->tcp_usec_ts) info->tcpi_options |= TCPI_OPT_USEC_TS; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, tcp_delack_max(sk))); info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; info->tcpi_unacked = tp->packets_out; info->tcpi_sacked = tp->sacked_out; info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; now = tcp_jiffies32; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); info->tcpi_pmtu = icsk->icsk_pmtu_cookie; info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; info->tcpi_rtt = tp->srtt_us >> 3; info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; info->tcpi_advmss = tp->advmss; info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; info->tcpi_bytes_acked = tp->bytes_acked; info->tcpi_bytes_received = tp->bytes_received; info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); tcp_get_info_chrono_stats(tp, info); info->tcpi_segs_out = tp->segs_out; /* segs_in and data_segs_in can be updated from tcp_segs_in() from BH */ info->tcpi_segs_in = READ_ONCE(tp->segs_in); info->tcpi_data_segs_in = READ_ONCE(tp->data_segs_in); info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; rate64 = tcp_compute_delivery_rate(tp); if (rate64) info->tcpi_delivery_rate = rate64; info->tcpi_delivered = tp->delivered; info->tcpi_delivered_ce = tp->delivered_ce; info->tcpi_bytes_sent = tp->bytes_sent; info->tcpi_bytes_retrans = tp->bytes_retrans; info->tcpi_dsack_dups = tp->dsack_dups; info->tcpi_reord_seen = tp->reord_seen; info->tcpi_rcv_ooopack = tp->rcv_ooopack; info->tcpi_snd_wnd = tp->snd_wnd; info->tcpi_rcv_wnd = tp->rcv_wnd; info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash; info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; info->tcpi_total_rto = tp->total_rto; info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; info->tcpi_total_rto_time = tp->total_rto_time; if (tp->rto_stamp) info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); static size_t tcp_opt_stats_get_size(void) { return nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BUSY */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_RWND_LIMITED */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_SNDBUF_LIMITED */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DATA_SEGS_OUT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_TOTAL_RETRANS */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_PACING_RATE */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DELIVERY_RATE */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_CWND */ nla_total_size(sizeof(u32)) + /* TCP_NLA_REORDERING */ nla_total_size(sizeof(u32)) + /* TCP_NLA_MIN_RTT */ nla_total_size(sizeof(u8)) + /* TCP_NLA_RECUR_RETRANS */ nla_total_size(sizeof(u8)) + /* TCP_NLA_DELIVERY_RATE_APP_LMT */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SNDQ_SIZE */ nla_total_size(sizeof(u8)) + /* TCP_NLA_CA_STATE */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */ nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */ nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */ nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */ nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */ nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */ nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */ nla_total_size(sizeof(u32)) + /* TCP_NLA_REHASH */ 0; } /* Returns TTL or hop limit of an incoming packet from skb. */ static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) return ip_hdr(skb)->ttl; else if (skb->protocol == htons(ETH_P_IPV6)) return ipv6_hdr(skb)->hop_limit; else return 0; } struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, const struct sk_buff *orig_skb, const struct sk_buff *ack_skb) { const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *stats; struct tcp_info info; unsigned long rate; u64 rate64; stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC); if (!stats) return NULL; tcp_get_info_chrono_stats(tp, &info); nla_put_u64_64bit(stats, TCP_NLA_BUSY, info.tcpi_busy_time, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED, info.tcpi_rwnd_limited, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, info.tcpi_sndbuf_limited, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT, tp->data_segs_out, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, tp->total_retrans, TCP_NLA_PAD); rate = READ_ONCE(sk->sk_pacing_rate); rate64 = (rate != ~0UL) ? rate : ~0ULL; nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD); rate64 = tcp_compute_delivery_rate(tp); nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp)); nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, TCP_NLA_PAD); nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, max_t(int, 0, tp->write_seq - tp->snd_nxt)); nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, TCP_NLA_PAD); if (ack_skb) nla_put_u8(stats, TCP_NLA_TTL, tcp_skb_ttl_or_hop_limit(ack_skb)); nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash); return stats; } int do_tcp_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); int val, len; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; if (tp->rx_opt.user_mss && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; if (tp->repair) val = tp->rx_opt.mss_clamp; break; case TCP_NODELAY: val = !!(tp->nonagle&TCP_NAGLE_OFF); break; case TCP_CORK: val = !!(tp->nonagle&TCP_NAGLE_CORK); break; case TCP_KEEPIDLE: val = keepalive_time_when(tp) / HZ; break; case TCP_KEEPINTVL: val = keepalive_intvl_when(tp) / HZ; break; case TCP_KEEPCNT: val = keepalive_probes(tp); break; case TCP_SYNCNT: val = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: val = READ_ONCE(tp->linger2); if (val >= 0) val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; break; case TCP_INFO: { struct tcp_info info; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; tcp_get_info(sk, &info); len = min_t(unsigned int, len, sizeof(info)); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &info, len)) return -EFAULT; return 0; } case TCP_CC_INFO: { const struct tcp_congestion_ops *ca_ops; union tcp_cc_info info; size_t sz = 0; int attr; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; ca_ops = icsk->icsk_ca_ops; if (ca_ops && ca_ops->get_info) sz = ca_ops->get_info(sk, ~0U, &attr, &info); len = min_t(unsigned int, len, sz); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &info, len)) return -EFAULT; return 0; } case TCP_QUICKACK: val = !inet_csk_in_pingpong_mode(sk); break; case TCP_CONGESTION: if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; len = min_t(unsigned int, len, TCP_CA_NAME_MAX); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; case TCP_ULP: if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; len = min_t(unsigned int, len, TCP_ULP_NAME_MAX); if (!icsk->icsk_ulp_ops) { len = 0; if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; return 0; } if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, icsk->icsk_ulp_ops->name, len)) return -EFAULT; return 0; case TCP_FASTOPEN_KEY: { u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)]; unsigned int key_len; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; key_len = tcp_fastopen_get_cipher(net, icsk, key) * TCP_FASTOPEN_KEY_LENGTH; len = min_t(unsigned int, len, key_len); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, key, len)) return -EFAULT; return 0; } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; case TCP_THIN_DUPACK: val = 0; break; case TCP_REPAIR: val = tp->repair; break; case TCP_REPAIR_QUEUE: if (tp->repair) val = tp->repair_queue; else return -EINVAL; break; case TCP_REPAIR_WINDOW: { struct tcp_repair_window opt; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len != sizeof(opt)) return -EINVAL; if (!tp->repair) return -EPERM; opt.snd_wl1 = tp->snd_wl1; opt.snd_wnd = tp->snd_wnd; opt.max_window = tp->max_window; opt.rcv_wnd = tp->rcv_wnd; opt.rcv_wup = tp->rcv_wup; if (copy_to_sockptr(optval, &opt, len)) return -EFAULT; return 0; } case TCP_QUEUE_SEQ: if (tp->repair_queue == TCP_SEND_QUEUE) val = tp->write_seq; else if (tp->repair_queue == TCP_RECV_QUEUE) val = tp->rcv_nxt; else return -EINVAL; break; case TCP_USER_TIMEOUT: val = READ_ONCE(icsk->icsk_user_timeout); break; case TCP_FASTOPEN: val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); break; case TCP_FASTOPEN_CONNECT: val = tp->fastopen_connect; break; case TCP_FASTOPEN_NO_COOKIE: val = tp->fastopen_no_cookie; break; case TCP_TX_DELAY: val = READ_ONCE(tp->tcp_tx_delay); break; case TCP_TIMESTAMP: val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset); if (tp->tcp_usec_ts) val |= 1; else val &= ~1; break; case TCP_NOTSENT_LOWAT: val = READ_ONCE(tp->notsent_lowat); break; case TCP_INQ: val = tp->recvmsg_inq; break; case TCP_SAVE_SYN: val = tp->save_syn; break; case TCP_SAVED_SYN: { if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; sockopt_lock_sock(sk); if (tp->saved_syn) { if (len < tcp_saved_syn_len(tp->saved_syn)) { len = tcp_saved_syn_len(tp->saved_syn); if (copy_to_sockptr(optlen, &len, sizeof(int))) { sockopt_release_sock(sk); return -EFAULT; } sockopt_release_sock(sk); return -EINVAL; } len = tcp_saved_syn_len(tp->saved_syn); if (copy_to_sockptr(optlen, &len, sizeof(int))) { sockopt_release_sock(sk); return -EFAULT; } if (copy_to_sockptr(optval, tp->saved_syn->data, len)) { sockopt_release_sock(sk); return -EFAULT; } tcp_saved_syn_free(tp); sockopt_release_sock(sk); } else { sockopt_release_sock(sk); len = 0; if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; } return 0; } #ifdef CONFIG_MMU case TCP_ZEROCOPY_RECEIVE: { struct scm_timestamping_internal tss; struct tcp_zerocopy_receive zc = {}; int err; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len < 0 || len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; if (unlikely(len > sizeof(zc))) { err = check_zeroed_sockptr(optval, sizeof(zc), len - sizeof(zc)); if (err < 1) return err == 0 ? -EINVAL : err; len = sizeof(zc); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; } if (copy_from_sockptr(&zc, optval, len)) return -EFAULT; if (zc.reserved) return -EINVAL; if (zc.msg_flags & ~(TCP_VALID_ZC_MSG_FLAGS)) return -EINVAL; sockopt_lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc, &tss); err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); sockopt_release_sock(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) goto zerocopy_rcv_cmsg; switch (len) { case offsetofend(struct tcp_zerocopy_receive, msg_flags): goto zerocopy_rcv_cmsg; case offsetofend(struct tcp_zerocopy_receive, msg_controllen): case offsetofend(struct tcp_zerocopy_receive, msg_control): case offsetofend(struct tcp_zerocopy_receive, flags): case offsetofend(struct tcp_zerocopy_receive, copybuf_len): case offsetofend(struct tcp_zerocopy_receive, copybuf_address): case offsetofend(struct tcp_zerocopy_receive, err): goto zerocopy_rcv_sk_err; case offsetofend(struct tcp_zerocopy_receive, inq): goto zerocopy_rcv_inq; case offsetofend(struct tcp_zerocopy_receive, length): default: goto zerocopy_rcv_out; } zerocopy_rcv_cmsg: if (zc.msg_flags & TCP_CMSG_TS) tcp_zc_finalize_rx_tstamp(sk, &zc, &tss); else zc.msg_flags = 0; zerocopy_rcv_sk_err: if (!err) zc.err = sock_error(sk); zerocopy_rcv_inq: zc.inq = tcp_inq_hint(sk); zerocopy_rcv_out: if (!err && copy_to_sockptr(optval, &zc, len)) err = -EFAULT; return err; } #endif case TCP_AO_REPAIR: return tcp_ao_get_repair(sk, optval, optlen); case TCP_AO_GET_KEYS: case TCP_AO_INFO: { int err; sockopt_lock_sock(sk); if (optname == TCP_AO_GET_KEYS) err = tcp_ao_get_mkts(sk, optval, optlen); else err = tcp_ao_get_sock_info(sk, optval, optlen); sockopt_release_sock(sk); return err; } default: return -ENOPROTOOPT; } if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, len)) return -EFAULT; return 0; } bool tcp_bpf_bypass_getsockopt(int level, int optname) { /* TCP do_tcp_getsockopt has optimized getsockopt implementation * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE. */ if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE) return true; return false; } EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname, optval, optlen); return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); } EXPORT_SYMBOL(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG int tcp_md5_sigpool_id = -1; EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id); int tcp_md5_alloc_sigpool(void) { size_t scratch_size; int ret; scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr); ret = tcp_sigpool_alloc_ahash("md5", scratch_size); if (ret >= 0) { /* As long as any md5 sigpool was allocated, the return * id would stay the same. Re-write the id only for the case * when previously all MD5 keys were deleted and this call * allocates the first MD5 key, which may return a different * sigpool id than was used previously. */ WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */ return 0; } return ret; } void tcp_md5_release_sigpool(void) { tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id)); } void tcp_md5_add_sigpool(void) { tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id)); } int tcp_md5_hash_key(struct tcp_sigpool *hp, const struct tcp_md5sig_key *key) { u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ struct scatterlist sg; sg_init_one(&sg, key->key, keylen); ahash_request_set_crypt(hp->req, &sg, NULL, keylen); /* We use data_race() because tcp_md5_do_add() might change * key->key under us */ return data_race(crypto_ahash_update(hp->req)); } EXPORT_SYMBOL(tcp_md5_hash_key); /* Called with rcu_read_lock() */ enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, int family, int l3index, const __u8 *hash_location) { /* This gets called for each TCP segment that has TCP-MD5 option. * We have 3 drop cases: * o No MD5 hash and one expected. * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; u8 newhash[16]; int genhash; key = tcp_md5_do_lookup(sk, l3index, saddr, family); if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } /* Check the signature. * To support dual stack listeners, we need to handle * IPv4-mapped case. */ if (family == AF_INET) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else genhash = tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); if (family == AF_INET) { tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d", genhash ? "tcp_v4_calc_md5_hash failed" : "", l3index); } else { if (genhash) { tcp_hash_fail("MD5 Hash failed", AF_INET6, skb, "L3 index %d", l3index); } else { tcp_hash_fail("MD5 Hash mismatch", AF_INET6, skb, "L3 index %d", l3index); } } return SKB_DROP_REASON_TCP_MD5FAILURE; } return SKB_NOT_DROPPED_YET; } EXPORT_SYMBOL(tcp_inbound_md5_hash); #endif void tcp_done(struct sock *sk) { struct request_sock *req; /* We might be called with a new socket, after * inet_csk_prepare_forced_close() has been called * so we can not use lockdep_sock_is_held(sk) */ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1); if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); if (req) reqsk_fastopen_remove(sk, req, false); WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); else inet_csk_destroy_sock(sk); } EXPORT_SYMBOL_GPL(tcp_done); int tcp_abort(struct sock *sk, int err) { int state = inet_sk_state_load(sk); if (state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); local_bh_disable(); inet_csk_reqsk_queue_drop(req->rsk_listener, req); local_bh_enable(); return 0; } if (state == TCP_TIME_WAIT) { struct inet_timewait_sock *tw = inet_twsk(sk); refcount_inc(&tw->tw_refcnt); local_bh_disable(); inet_twsk_deschedule_put(tw); local_bh_enable(); return 0; } /* BPF context ensures sock locking. */ if (!has_current_bpf_ctx()) /* Don't race with userspace socket closes such as tcp_close. */ lock_sock(sk); if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); inet_csk_listen_stop(sk); } /* Don't race with BH socket closes such as inet_csk_listen_stop. */ local_bh_disable(); bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) { WRITE_ONCE(sk->sk_err, err); /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); if (tcp_need_reset(sk->sk_state)) tcp_send_active_reset(sk, GFP_ATOMIC); tcp_done(sk); } bh_unlock_sock(sk); local_bh_enable(); tcp_write_queue_purge(sk); if (!has_current_bpf_ctx()) release_sock(sk); return 0; } EXPORT_SYMBOL_GPL(tcp_abort); extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; static int __init set_thash_entries(char *str) { ssize_t ret; if (!str) return 0; ret = kstrtoul(str, 0, &thash_entries); if (ret) return 0; return 1; } __setup("thash_entries=", set_thash_entries); static void __init tcp_init_mem(void) { unsigned long limit = nr_free_buffer_pages() / 16; limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; /* 4.68 % */ sysctl_tcp_mem[1] = limit; /* 6.25 % */ sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */ } void __init tcp_init(void) { int max_rshare, max_wshare, cnt; unsigned long limit; unsigned int i; BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof_field(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", thash_entries, 21, /* one slot per 2 MB*/ 0, 64 * 1024); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); tcp_hashinfo.bind2_bucket_cachep = kmem_cache_create("tcp_bind2_bucket", sizeof(struct inet_bind2_bucket), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); /* Size and allocate the main established and bind bucket * hash tables. * * The methodology is similar to that of the buffer cache. */ tcp_hashinfo.ehash = alloc_large_system_hash("TCP established", sizeof(struct inet_ehash_bucket), thash_entries, 17, /* one slot per 128 KB of memory */ 0, NULL, &tcp_hashinfo.ehash_mask, 0, thash_entries ? 0 : 512 * 1024); for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", 2 * sizeof(struct inet_bind_hashbucket), tcp_hashinfo.ehash_mask + 1, 17, /* one slot per 128 KB of memory */ 0, &tcp_hashinfo.bhash_size, NULL, 0, 64 * 1024); tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; tcp_hashinfo.bhash2 = tcp_hashinfo.bhash + tcp_hashinfo.bhash_size; for (i = 0; i < tcp_hashinfo.bhash_size; i++) { spin_lock_init(&tcp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); spin_lock_init(&tcp_hashinfo.bhash2[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain); } tcp_hashinfo.pernet = false; cnt = tcp_hashinfo.ehash_mask + 1; sysctl_tcp_max_orphans = cnt / 2; tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); max_wshare = min(4UL*1024*1024, limit); max_rshare = min(6UL*1024*1024, limit); init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare); init_net.ipv4.sysctl_tcp_rmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_rmem[1] = 131072; init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare); pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_v4_init(); tcp_metrics_init(); BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); tcp_tasklet_init(); mptcp_init(); } |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 | /* SPDX-License-Identifier: GPL-2.0-or-later * * Copyright (C) 2005 David Brownell */ #ifndef __LINUX_SPI_H #define __LINUX_SPI_H #include <linux/acpi.h> #include <linux/bits.h> #include <linux/completion.h> #include <linux/device.h> #include <linux/gpio/consumer.h> #include <linux/kthread.h> #include <linux/mod_devicetable.h> #include <linux/overflow.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/u64_stats_sync.h> #include <uapi/linux/spi/spi.h> struct dma_chan; struct software_node; struct ptp_system_timestamp; struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; struct spi_controller_mem_caps; struct spi_message; /* * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, * and SPI infrastructure. */ extern struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers * @syncp: seqcount to protect members in this struct for per-cpu update * on 32-bit systems * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled * @errors: number of errors during spi_transfer * @timedout: number of timeouts during spi_transfer * * @spi_sync: number of times spi_sync is used * @spi_sync_immediate: * number of times spi_sync is executed immediately * in calling context without queuing and scheduling * @spi_async: number of times spi_async is used * * @bytes: number of bytes transferred to/from device * @bytes_tx: number of bytes sent to device * @bytes_rx: number of bytes received from device * * @transfer_bytes_histo: * transfer bytes histogram * * @transfers_split_maxsize: * number of transfers that have been split because of * maxsize limit */ struct spi_statistics { struct u64_stats_sync syncp; u64_stats_t messages; u64_stats_t transfers; u64_stats_t errors; u64_stats_t timedout; u64_stats_t spi_sync; u64_stats_t spi_sync_immediate; u64_stats_t spi_async; u64_stats_t bytes; u64_stats_t bytes_rx; u64_stats_t bytes_tx; #define SPI_STATISTICS_HISTO_SIZE 17 u64_stats_t transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; u64_stats_t transfers_split_maxsize; }; #define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \ do { \ struct spi_statistics *__lstats; \ get_cpu(); \ __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_add(&__lstats->field, count); \ u64_stats_update_end(&__lstats->syncp); \ put_cpu(); \ } while (0) #define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \ do { \ struct spi_statistics *__lstats; \ get_cpu(); \ __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_inc(&__lstats->field); \ u64_stats_update_end(&__lstats->syncp); \ put_cpu(); \ } while (0) /** * struct spi_delay - SPI delay information * @value: Value for the delay * @unit: Unit for the delay */ struct spi_delay { #define SPI_DELAY_UNIT_USECS 0 #define SPI_DELAY_UNIT_NSECS 1 #define SPI_DELAY_UNIT_SCK 2 u16 value; u8 unit; }; extern int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer); extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_transfer *xfer); /** * struct spi_device - Controller side proxy for an SPI slave device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. * @master: Copy of controller, for backwards compatibility. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. * @chip_select: Chipselect, distinguishing chips handled by @controller. * @mode: The spi mode defines how data is clocked out and in. * This may be changed by the device's driver. * The "active low" default for chipselect mode can be overridden * (by specifying SPI_CS_HIGH) as can the "MSB first" default for * each word in a transfer (by specifying SPI_LSB_FIRST). * @bits_per_word: Data transfers involve one or more words; word sizes * like eight or 12 bits are common. In-memory wordsizes are * powers of two bytes (e.g. 20 bit samples use 32 bits). * This may be changed by the device's driver, or left at the * default (0) indicating protocol words are eight bit bytes. * The spi_transfer.bits_per_word can override this for each transfer. * @rt: Make the pump thread real time priority. * @irq: Negative, or the number passed to request_irq() to receive * interrupts from this device. * @controller_state: Controller's runtime state * @controller_data: Board-specific definitions for controller, such as * FIFO initialization parameters; from board_info.controller_data * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging * @driver_override: If the name of a driver is written to this attribute, then * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. * @cs_gpiod: GPIO descriptor of the chipselect line (optional, NULL when * not using a GPIO line) * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted * @cs_hold: delay to be introduced by the controller before CS is deasserted * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. * @pcpu_statistics: statistics for the spi_device * * A @spi_device is used to interchange data between an SPI slave * (usually a discrete chip) and CPU memory. * * In @dev, the platform_data is used to hold information about this * device that's meaningful to the device's protocol driver, but not * to its controller. One example might be an identifier for a chip * variant with slightly different functionality; another might be * information about how this particular board wires the chip's pins. */ struct spi_device { struct device dev; struct spi_controller *controller; struct spi_controller *master; /* Compatibility layer */ u32 max_speed_hz; u8 chip_select; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ #define SPI_NO_RX BIT(30) /* No receive wire */ /* * TPM specification defines flow control over SPI. Client device * can insert a wait state on MISO when address is transmitted by * controller on MOSI. Detecting the wait state in software is only * possible for full duplex controllers. For controllers that support * only half-duplex, the wait state detection needs to be implemented * in hardware. TPM devices would set this flag when hardware flow * control is expected from SPI controller. */ #define SPI_TPM_HW_FLOW BIT(29) /* TPM HW flow control */ /* * All bits defined above should be covered by SPI_MODE_KERNEL_MASK. * The SPI_MODE_KERNEL_MASK has the SPI_MODE_USER_MASK counterpart, * which is defined in 'include/uapi/linux/spi/spi.h'. * The bits defined here are from bit 31 downwards, while in * SPI_MODE_USER_MASK are from 0 upwards. * These bits must not overlap. A static assert check should make sure of that. * If adding extra bits, make sure to decrease the bit index below as well. */ #define SPI_MODE_KERNEL_MASK (~(BIT(29) - 1)) u32 mode; int irq; void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; struct gpio_desc *cs_gpiod; /* Chip select GPIO descriptor */ struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; /* * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: * - memory packing (12 bit samples into low bits, others zeroed) * - priority * - chipselect delays * - ... */ }; /* Make sure that SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK don't overlap */ static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); static inline struct spi_device *to_spi_device(const struct device *dev) { return dev ? container_of(dev, struct spi_device, dev) : NULL; } /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) { return (spi && get_device(&spi->dev)) ? spi : NULL; } static inline void spi_dev_put(struct spi_device *spi) { if (spi) put_device(&spi->dev); } /* ctldata is for the bus_controller driver's runtime state */ static inline void *spi_get_ctldata(const struct spi_device *spi) { return spi->controller_state; } static inline void spi_set_ctldata(struct spi_device *spi, void *state) { spi->controller_state = state; } /* Device driver data */ static inline void spi_set_drvdata(struct spi_device *spi, void *data) { dev_set_drvdata(&spi->dev, data); } static inline void *spi_get_drvdata(const struct spi_device *spi) { return dev_get_drvdata(&spi->dev); } static inline u8 spi_get_chipselect(const struct spi_device *spi, u8 idx) { return spi->chip_select; } static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect) { spi->chip_select = chipselect; } static inline struct gpio_desc *spi_get_csgpiod(const struct spi_device *spi, u8 idx) { return spi->cs_gpiod; } static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod) { spi->cs_gpiod = csgpiod; } /** * struct spi_driver - Host side "protocol" driver * @id_table: List of SPI devices supported by this driver * @probe: Binds this driver to the SPI device. Drivers can verify * that the device is actually present, and may need to configure * characteristics (such as bits_per_word) which weren't needed for * the initial configuration done during system setup. * @remove: Unbinds this driver from the SPI device * @shutdown: Standard shutdown callback used during system state * transitions such as powerdown/halt and kexec * @driver: SPI device drivers should initialize the name and owner * field of this structure. * * This represents the kind of device driver that uses SPI messages to * interact with the hardware at the other end of a SPI link. It's called * a "protocol" driver because it works through messages rather than talking * directly to SPI hardware (which is what the underlying SPI controller * driver does to pass those messages). These protocols are defined in the * specification for the device(s) supported by the driver. * * As a rule, those device protocols represent the lowest level interface * supported by a driver, and it will support upper level interfaces too. * Examples of such upper levels include frameworks like MTD, networking, * MMC, RTC, filesystem character device nodes, and hardware monitoring. */ struct spi_driver { const struct spi_device_id *id_table; int (*probe)(struct spi_device *spi); void (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); struct device_driver driver; }; static inline struct spi_driver *to_spi_driver(struct device_driver *drv) { return drv ? container_of(drv, struct spi_driver, driver) : NULL; } extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv); /** * spi_unregister_driver - reverse effect of spi_register_driver * @sdrv: the driver to unregister * Context: can sleep */ static inline void spi_unregister_driver(struct spi_driver *sdrv) { if (sdrv) driver_unregister(&sdrv->driver); } extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select); /* Use a define to avoid include chaining to get THIS_MODULE */ #define spi_register_driver(driver) \ __spi_register_driver(THIS_MODULE, driver) /** * module_spi_driver() - Helper macro for registering a SPI driver * @__spi_driver: spi_driver struct * * Helper macro for SPI drivers which do not do anything special in module * init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_spi_driver(__spi_driver) \ module_driver(__spi_driver, spi_register_driver, \ spi_unregister_driver) /** * struct spi_controller - interface to SPI master or slave controller * @dev: device interface to this driver * @list: link with the global spi_controller list * @bus_num: board-specific (and often SOC-specific) identifier for a * given SPI controller. * @num_chipselect: chipselects are used to distinguish individual * SPI slaves, and are numbered from zero to num_chipselects. * each slave has a chipselect signal, but it's common that not * every chipselect is connected to a slave. * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @buswidth_override_bits: flags to override for this controller driver * @bits_per_word_mask: A mask indicating which values of bits_per_word are * supported by the driver. Bit n indicates that a bits_per_word n+1 is * supported. If set, the SPI core will reject any transfer with an * unsupported bits_per_word. If not set, this value is simply ignored, * and it's up to the individual driver to perform any validation. * @min_speed_hz: Lowest supported transfer speed * @max_speed_hz: Highest supported transfer speed * @flags: other constraints relevant to this driver * @slave: indicates that this is an SPI slave controller * @target: indicates that this is an SPI target controller * @devm_allocated: whether the allocation of this struct is devres-managed * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @max_message_size: function that returns the max message size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @io_mutex: mutex for physical bus access * @add_lock: mutex to avoid adding devices to the same chipselect * @bus_lock_spinlock: spinlock for SPI bus locking * @bus_lock_mutex: mutex for exclusion of multiple callers * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. * It's always safe to call this unless transfers are pending on * the device whose settings are being modified. * @set_cs_timing: optional hook for SPI devices to request SPI master * controller for configuring specific CS setup time, hold time and inactive * delay interms of clock counts * @transfer: adds a message to the controller's transfer queue. * @cleanup: frees controller-specific state * @can_dma: determine whether this controller supports DMA * @dma_map_dev: device which can be used for DMA mapping * @cur_rx_dma_dev: device which is currently used for RX DMA mapping * @cur_tx_dma_dev: device which is currently used for TX DMA mapping * @queued: whether this controller is providing an internal message queue * @kworker: pointer to thread struct for message pump * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to synchronise access to message queue * @queue: message queue * @cur_msg: the currently in-flight message * @cur_msg_completion: a completion for the current in-flight message * @cur_msg_incomplete: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to check if the driver has * already called spi_finalize_current_message(). * @cur_msg_need_completion: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() * @cur_msg_mapped: message has been mapped for DMA * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running * @rt: whether this queue is set to run as a realtime task * @auto_runtime_pm: the core should ensure a runtime PM reference is held * while the hardware is prepared, using the parent * device for the spidev * @max_dma_len: Maximum length of a DMA transfer for the device. * @prepare_transfer_hardware: a message will soon arrive from the queue * so the subsystem requests the driver to prepare the transfer hardware * by issuing this call * @transfer_one_message: the subsystem calls the driver to transfer a single * message while queuing transfers that arrive in the meantime. When the * driver is finished with this message, it must call * spi_finalize_current_message() so the subsystem can issue the next * message * @unprepare_transfer_hardware: there are currently no more messages on the * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call * * @set_cs: set the logic level of the chip select line. May be called * from interrupt context. * @prepare_message: set up the controller to transfer a single message, * for example doing DMA mapping. Called from threaded * context. * @transfer_one: transfer a single spi_transfer. * * - return 0 if the transfer is finished, * - return 1 if the transfer is still in progress. When * the driver is finished with this transfer it must * call spi_finalize_current_transfer() so the subsystem * can issue the next transfer. Note: transfer_one and * transfer_one_message are mutually exclusive; when both * are set, the generic subsystem does not call your * transfer_one callback. * @handle_err: the subsystem calls the driver to handle an error that occurs * in the generic implementation of transfer_one_message(). * @mem_ops: optimized/dedicated operations for interactions with SPI memory. * This field is optional and should only be implemented if the * controller has native support for memory like operations. * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab * GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have * the cs_gpiod assigned if a GPIO line is found for the chipselect. * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will * fill in this field with the first unused native CS, to be used by SPI * controller drivers that need to drive a native CS when using GPIO CS. * @max_native_cs: When cs_gpiods is used, and this field is filled in, * spi_register_controller() will validate all native CS (including the * unused native CS) against this value. * @pcpu_statistics: statistics for the spi_controller * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel * @dummy_rx: dummy receive buffer for full-duplex devices * @dummy_tx: dummy transmit buffer for full-duplex devices * @fw_translate_cs: If the boot firmware uses different numbering scheme * what Linux expects, this optional hook can be used to translate * between the two. * @ptp_sts_supported: If the driver sets this to true, it must provide a * time snapshot in @spi_transfer->ptp_sts as close as possible to the * moment in time when @spi_transfer->ptp_sts_word_pre and * @spi_transfer->ptp_sts_word_post were transmitted. * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping * @fallback: fallback to PIO if DMA transfer return failure with * SPI_TRANS_FAIL_NO_START. * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals * but not chip select signals. Each device may be configured to use a * different clock rate, since those shared signals are ignored unless * the chip is selected. * * The driver for an SPI controller manages access to those devices through * a queue of spi_message transactions, copying data between CPU memory and * an SPI slave device. For each such message it queues, it calls the * message's completion function when the transaction completes. */ struct spi_controller { struct device dev; struct list_head list; /* * Other than negative (== assign one dynamically), bus_num is fully * board-specific. Usually that simplifies to being SoC-specific. * example: one SoC has three SPI controllers, numbered 0..2, * and one board's schematics might show it using SPI-2. Software * would normally use bus_num=2 for that controller. */ s16 bus_num; /* * Chipselects will be integral to many controllers; some others * might use board-specific GPIOs. */ u16 num_chipselect; /* Some SPI controllers pose alignment requirements on DMAable * buffers; let protocol drivers know about these requirements. */ u16 dma_alignment; /* spi_device.mode flags understood by this controller driver */ u32 mode_bits; /* spi_device.mode flags override flags for this controller */ u32 buswidth_override_bits; /* Bitmask of supported bits_per_word for transfers */ u32 bits_per_word_mask; #define SPI_BPW_MASK(bits) BIT((bits) - 1) #define SPI_BPW_RANGE_MASK(min, max) GENMASK((max) - 1, (min) - 1) /* Limits on transfer speed */ u32 min_speed_hz; u32 max_speed_hz; /* Other constraints relevant to this driver */ u16 flags; #define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* Can't do full duplex */ #define SPI_CONTROLLER_NO_RX BIT(1) /* Can't do buffer read */ #define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ #define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; union { /* Flag indicating this is an SPI slave controller */ bool slave; /* Flag indicating this is an SPI target controller */ bool target; }; /* * On some hardware transfer / message size may be constrained * the limit may depend on device transfer settings. */ size_t (*max_transfer_size)(struct spi_device *spi); size_t (*max_message_size)(struct spi_device *spi); /* I/O mutex */ struct mutex io_mutex; /* Used to avoid adding the same CS twice */ struct mutex add_lock; /* Lock and mutex for SPI bus locking */ spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; /* Flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; /* * Setup mode and clock, etc (SPI driver may call many times). * * IMPORTANT: this may be called when transfers to another * device are active. DO NOT UPDATE SHARED REGISTERS in ways * which could break those transfers. */ int (*setup)(struct spi_device *spi); /* * set_cs_timing() method is for SPI controllers that supports * configuring CS timing. * * This hook allows SPI client drivers to request SPI controllers * to configure specific CS timing through spi_set_cs_timing() after * spi_setup(). */ int (*set_cs_timing)(struct spi_device *spi); /* * Bidirectional bulk transfers * * + The transfer() method may not sleep; its main role is * just to add the message to the queue. * + For now there's no remove-from-queue operation, or * any other request management * + To a given spi_device, message queueing is pure FIFO * * + The controller's main job is to process its message queue, * selecting a chip (for masters), then transferring data * + If there are multiple spi_device children, the i/o queue * arbitration algorithm is unspecified (round robin, FIFO, * priority, reservations, preemption, etc) * * + Chipselect stays active during the entire message * (unless modified by spi_transfer.cs_change != 0). * + The message transfers use clock and SPI mode parameters * previously established by setup() for this device */ int (*transfer)(struct spi_device *spi, struct spi_message *mesg); /* Called on release() to free memory provided by spi_controller */ void (*cleanup)(struct spi_device *spi); /* * Used to enable core support for DMA handling, if can_dma() * exists and returns true then the transfer will be mapped * prior to transfer_one() being called. The driver should * not modify or store xfer and dma_tx and dma_rx must be set * while the device is prepared. */ bool (*can_dma)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *xfer); struct device *dma_map_dev; struct device *cur_rx_dma_dev; struct device *cur_tx_dma_dev; /* * These hooks are for drivers that want to use the generic * controller transfer queueing mechanism. If these are used, the * transfer() function above must NOT be specified by the driver. * Over time we expect SPI drivers to be phased over to this API. */ bool queued; struct kthread_worker *kworker; struct kthread_work pump_messages; spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; struct completion cur_msg_completion; bool cur_msg_incomplete; bool cur_msg_need_completion; bool busy; bool running; bool rt; bool auto_runtime_pm; bool cur_msg_mapped; char last_cs; bool last_cs_mode_high; bool fallback; struct completion xfer_completion; size_t max_dma_len; int (*prepare_transfer_hardware)(struct spi_controller *ctlr); int (*transfer_one_message)(struct spi_controller *ctlr, struct spi_message *mesg); int (*unprepare_transfer_hardware)(struct spi_controller *ctlr); int (*prepare_message)(struct spi_controller *ctlr, struct spi_message *message); int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); union { int (*slave_abort)(struct spi_controller *ctlr); int (*target_abort)(struct spi_controller *ctlr); }; /* * These hooks are for drivers that use a generic implementation * of transfer_one_message() provided by the core. */ void (*set_cs)(struct spi_device *spi, bool enable); int (*transfer_one)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *transfer); void (*handle_err)(struct spi_controller *ctlr, struct spi_message *message); /* Optimized handlers for SPI memory-like operations. */ const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; /* GPIO chip select */ struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; s8 max_native_cs; /* Statistics */ struct spi_statistics __percpu *pcpu_statistics; /* DMA channels for use with core dmaengine helpers */ struct dma_chan *dma_tx; struct dma_chan *dma_rx; /* Dummy data for full duplex devices */ void *dummy_rx; void *dummy_tx; int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); /* * Driver sets this field to indicate it is able to snapshot SPI * transfers (needed e.g. for reading the time of POSIX clocks) */ bool ptp_sts_supported; /* Interrupt enable state during PTP system timestamping */ unsigned long irq_flags; /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; bool must_async; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) { return dev_get_drvdata(&ctlr->dev); } static inline void spi_controller_set_devdata(struct spi_controller *ctlr, void *data) { dev_set_drvdata(&ctlr->dev, data); } static inline struct spi_controller *spi_controller_get(struct spi_controller *ctlr) { if (!ctlr || !get_device(&ctlr->dev)) return NULL; return ctlr; } static inline void spi_controller_put(struct spi_controller *ctlr) { if (ctlr) put_device(&ctlr->dev); } static inline bool spi_controller_is_slave(struct spi_controller *ctlr) { return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave; } static inline bool spi_controller_is_target(struct spi_controller *ctlr) { return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target; } /* PM calls that need to be issued by the driver */ extern int spi_controller_suspend(struct spi_controller *ctlr); extern int spi_controller_resume(struct spi_controller *ctlr); /* Calls the driver make to interact with the message queue */ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr); extern void spi_finalize_current_message(struct spi_controller *ctlr); extern void spi_finalize_current_transfer(struct spi_controller *ctlr); /* Helper calls for driver to timestamp transfer */ void spi_take_timestamp_pre(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); /* The SPI driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); static inline struct spi_controller *spi_alloc_master(struct device *host, unsigned int size) { return __spi_alloc_controller(host, size, false); } static inline struct spi_controller *spi_alloc_slave(struct device *host, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __spi_alloc_controller(host, size, true); } static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) { return __spi_alloc_controller(dev, size, false); } static inline struct spi_controller *spi_alloc_target(struct device *dev, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __spi_alloc_controller(dev, size, true); } struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave); static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, unsigned int size) { return __devm_spi_alloc_controller(dev, size, false); } static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __devm_spi_alloc_controller(dev, size, true); } static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) { return __devm_spi_alloc_controller(dev, size, false); } static inline struct spi_controller *devm_spi_alloc_target(struct device *dev, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __devm_spi_alloc_controller(dev, size, true); } extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); #endif /* * SPI resource management while processing a SPI message */ typedef void (*spi_res_release_t)(struct spi_controller *ctlr, struct spi_message *msg, void *res); /** * struct spi_res - SPI resource management structure * @entry: list entry * @release: release code called prior to freeing this resource * @data: extra data allocated for the specific use-case * * This is based on ideas from devres, but focused on life-cycle * management during spi_message processing. */ struct spi_res { struct list_head entry; spi_res_release_t release; unsigned long long data[]; /* Guarantee ull alignment */ }; /*---------------------------------------------------------------------------*/ /* * I/O INTERFACE between SPI controller and protocol drivers * * Protocol drivers use a queue of spi_messages, each transferring data * between the controller and memory buffers. * * The spi_messages themselves consist of a series of read+write transfer * segments. Those segments always read the same number of bits as they * write; but one or the other is easily ignored by passing a NULL buffer * pointer. (This is unlike most types of I/O API, because SPI hardware * is full duplex.) * * NOTE: Allocation of spi_transfer and spi_message memory is entirely * up to the protocol driver, which guarantees the integrity of both (as * well as the data buffers) for as long as the message is queued. */ /** * struct spi_transfer - a read/write buffer pair * @tx_buf: data to be written (DMA-safe memory), or NULL * @rx_buf: data to be read (DMA-safe memory), or NULL * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped * @tx_nbits: number of bits used for writing. If 0 the default * (SPI_NBITS_SINGLE) is used. * @rx_nbits: number of bits used for reading. If 0 the default * (SPI_NBITS_SINGLE) is used. * @len: size of rx and tx buffers (in bytes) * @speed_hz: Select a speed other than the device default for this * transfer. If 0 the default (from @spi_device) is used. * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @dummy_data: indicates transfer is dummy bytes transfer. * @cs_off: performs the transfer with chipselect off. * @cs_change: affects chipselect after this transfer completes * @cs_change_delay: delay between cs deassert and assert when * @cs_change is set and @spi_transfer is not the last in @spi_message * @delay: delay to be introduced after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to * transfer this transfer. Set to 0 if the SPI bus driver does * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset * within @tx_buf for which the SPI device is requesting that the time * snapshot for this transfer begins. Upon completing the SPI transfer, * this value may have changed compared to what was requested, depending * on the available snapshotting resolution (DMA transfer, * @ptp_sts_supported is false, etc). * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning * that a single byte should be snapshotted). * If the core takes care of the timestamp (if @ptp_sts_supported is false * for this controller), it will set @ptp_sts_word_pre to 0, and * @ptp_sts_word_post to the length of the transfer. This is done * purposefully (instead of setting to spi_transfer->len - 1) to denote * that a transfer-level snapshot taken from within the driver may still * be of higher quality. * @ptp_sts: Pointer to a memory location held by the SPI slave device where a * PTP system timestamp structure may lie. If drivers use PIO or their * hardware has some sort of assist for retrieving exact transfer timing, * they can (and should) assert @ptp_sts_supported and populate this * structure using the ptp_read_system_*ts helper functions. * The timestamp must represent the time at which the SPI slave device has * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. * @timestamped: true if the transfer has been timestamped * @error: Error status logged by SPI controller driver. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. * In some cases, they may also want to provide DMA addresses for * the data being transferred; that may reduce overhead, when the * underlying driver uses DMA. * * If the transmit buffer is NULL, zeroes will be shifted out * while filling @rx_buf. If the receive buffer is NULL, the data * shifted in will be discarded. Only "len" bytes shift out (or in). * It's an error to try to shift out a partial word. (For example, by * shifting out three bytes with word size of sixteen or twenty bits; * the former uses two bytes per word, the latter uses four bytes.) * * In-memory data values are always in native CPU byte order, translated * from the wire byte order (big-endian except with SPI_LSB_FIRST). So * for example when bits_per_word is sixteen, buffers are 2N bytes long * (@len = 2N) and hold N sixteen bit words in CPU byte order. * * When the word size of the SPI transfer is not a power-of-two multiple * of eight bits, those in-memory words include extra bits. In-memory * words are always seen by protocol drivers as right-justified, so the * undefined (rx) or unused (tx) bits are always the most significant bits. * * All SPI transfers start with the relevant chipselect active. Normally * it stays selected until after the last transfer in a message. Drivers * can affect the chipselect signal using cs_change. * * (i) If the transfer isn't the last one in the message, this flag is * used to make the chipselect briefly go inactive in the middle of the * message. Toggling chipselect in this way may be needed to terminate * a chip command, letting a single spi_message perform all of group of * chip transactions together. * * (ii) When the transfer is the last one in the message, the chip may * stay selected until the next transfer. On multi-device SPI busses * with nothing blocking messages going to other devices, this is just * a performance hint; starting a message to another device deselects * this one. But in other cases, this can be used to ensure correctness. * Some devices need protocol transactions to be built from a series of * spi_message submissions, where the content of one message is determined * by the results of previous messages and where the whole transaction * ends when the chipselect goes inactive. * * When SPI can transfer in 1x,2x or 4x. It can get this transfer information * from device through @tx_nbits and @rx_nbits. In Bi-direction, these * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x) * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to * insulate against future API updates. After you submit a message * and its transfers, ignore them until its completion callback. */ struct spi_transfer { /* * It's okay if tx_buf == rx_buf (right?). * For MicroWire, one buffer must be NULL. * Buffers must work with dma_*map_single() calls, unless * spi_message.is_dma_mapped reports a pre-existing mapping. */ const void *tx_buf; void *rx_buf; unsigned len; #define SPI_TRANS_FAIL_NO_START BIT(0) u16 error; dma_addr_t tx_dma; dma_addr_t rx_dma; struct sg_table tx_sg; struct sg_table rx_sg; unsigned dummy_data:1; unsigned cs_off:1; unsigned cs_change:1; unsigned tx_nbits:3; unsigned rx_nbits:3; unsigned timestamped:1; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; struct spi_delay word_delay; u32 speed_hz; u32 effective_speed_hz; unsigned int ptp_sts_word_pre; unsigned int ptp_sts_word_post; struct ptp_system_timestamp *ptp_sts; struct list_head transfer_list; }; /** * struct spi_message - one multi-segment SPI transaction * @transfers: list of transfer segments in this transaction * @spi: SPI device to which the transaction is queued * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual * addresses for each transfer buffer * @complete: called to report transaction completions * @context: the argument to complete() when it's called * @frame_length: the total number of bytes in the message * @actual_length: the total number of bytes that were transferred in all * successful segments * @status: zero for success, else negative errno * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed * @prepared: spi_prepare_message was called for the this message * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" * in the sense that no other spi_message may use that SPI bus until that * sequence completes. On some systems, many such sequences can execute as * a single programmed DMA transfer. On all systems, these messages are * queued, and might complete after transactions to other devices. Messages * sent to a given spi_device are always executed in FIFO order. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to * insulate against future API updates. After you submit a message * and its transfers, ignore them until its completion callback. */ struct spi_message { struct list_head transfers; struct spi_device *spi; unsigned is_dma_mapped:1; /* spi_prepare_message() was called for this message */ bool prepared; /* * REVISIT: we might want a flag affecting the behavior of the * last transfer ... allowing things like "read 16 bit length L" * immediately followed by "read L bytes". Basically imposing * a specific message scheduling algorithm. * * Some controller drivers (message-at-a-time queue processing) * could provide that as their default scheduling algorithm. But * others (with multi-message pipelines) could need a flag to * tell them about such special cases. */ /* Completion is reported through a callback */ int status; void (*complete)(void *context); void *context; unsigned frame_length; unsigned actual_length; /* * For optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; /* List of spi_res resources when the SPI message is processed */ struct list_head resources; }; static inline void spi_message_init_no_memset(struct spi_message *m) { INIT_LIST_HEAD(&m->transfers); INIT_LIST_HEAD(&m->resources); } static inline void spi_message_init(struct spi_message *m) { memset(m, 0, sizeof *m); spi_message_init_no_memset(m); } static inline void spi_message_add_tail(struct spi_transfer *t, struct spi_message *m) { list_add_tail(&t->transfer_list, &m->transfers); } static inline void spi_transfer_del(struct spi_transfer *t) { list_del(&t->transfer_list); } static inline int spi_transfer_delay_exec(struct spi_transfer *t) { return spi_delay_exec(&t->delay, t); } /** * spi_message_init_with_transfers - Initialize spi_message and append transfers * @m: spi_message to be initialized * @xfers: An array of SPI transfers * @num_xfers: Number of items in the xfer array * * This function initializes the given spi_message and adds each spi_transfer in * the given array to the message. */ static inline void spi_message_init_with_transfers(struct spi_message *m, struct spi_transfer *xfers, unsigned int num_xfers) { unsigned int i; spi_message_init(m); for (i = 0; i < num_xfers; ++i) spi_message_add_tail(&xfers[i], m); } /* * It's fine to embed message and transaction structures in other data * structures so long as you don't free them while they're in use. */ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { struct spi_message_with_transfers { struct spi_message m; struct spi_transfer t[]; } *mwt; unsigned i; mwt = kzalloc(struct_size(mwt, t, ntrans), flags); if (!mwt) return NULL; spi_message_init_no_memset(&mwt->m); for (i = 0; i < ntrans; i++) spi_message_add_tail(&mwt->t[i], &mwt->m); return &mwt->m; } static inline void spi_message_free(struct spi_message *m) { kfree(m); } extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); extern int spi_target_abort(struct spi_device *spi); static inline size_t spi_max_message_size(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; if (!ctlr->max_message_size) return SIZE_MAX; return ctlr->max_message_size(spi); } static inline size_t spi_max_transfer_size(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; size_t tr_max = SIZE_MAX; size_t msg_max = spi_max_message_size(spi); if (ctlr->max_transfer_size) tr_max = ctlr->max_transfer_size(spi); /* Transfer size limit must not be greater than message size limit */ return min(tr_max, msg_max); } /** * spi_is_bpw_supported - Check if bits per word is supported * @spi: SPI device * @bpw: Bits per word * * This function checks to see if the SPI controller supports @bpw. * * Returns: * True if @bpw is supported, false otherwise. */ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) { u32 bpw_mask = spi->master->bits_per_word_mask; if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw))) return true; return false; } /** * spi_controller_xfer_timeout - Compute a suitable timeout value * @ctlr: SPI device * @xfer: Transfer descriptor * * Compute a relevant timeout value for the given transfer. We derive the time * that it would take on a single data line and take twice this amount of time * with a minimum of 500ms to avoid false positives on loaded systems. * * Returns: Transfer timeout value in milliseconds. */ static inline unsigned int spi_controller_xfer_timeout(struct spi_controller *ctlr, struct spi_transfer *xfer) { return max(xfer->len * 8 * 2 / (xfer->speed_hz / 1000), 500U); } /*---------------------------------------------------------------------------*/ /* SPI transfer replacement methods which make use of spi_res */ struct spi_replaced_transfers; typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, struct spi_message *msg, struct spi_replaced_transfers *res); /** * struct spi_replaced_transfers - structure describing the spi_transfer * replacements that have occurred * so that they can get reverted * @release: some extra release code to get executed prior to * releasing this structure * @extradata: pointer to some extra data if requested or NULL * @replaced_transfers: transfers that have been replaced and which need * to get restored * @replaced_after: the transfer after which the @replaced_transfers * are to get re-inserted * @inserted: number of transfers inserted * @inserted_transfers: array of spi_transfers of array-size @inserted, * that have been replacing replaced_transfers * * Note: that @extradata will point to @inserted_transfers[@inserted] * if some extra allocation is requested, so alignment will be the same * as for spi_transfers. */ struct spi_replaced_transfers { spi_replaced_release_t release; void *extradata; struct list_head replaced_transfers; struct list_head *replaced_after; size_t inserted; struct spi_transfer inserted_transfers[]; }; /*---------------------------------------------------------------------------*/ /* SPI transfer transformation methods */ extern int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, size_t maxsize, gfp_t gfp); extern int spi_split_transfers_maxwords(struct spi_controller *ctlr, struct spi_message *msg, size_t maxwords, gfp_t gfp); /*---------------------------------------------------------------------------*/ /* * All these synchronous SPI transfer routines are utilities layered * over the core async transfer primitive. Here, "synchronous" means * they will sleep uninterruptibly until the async transfer completes. */ extern int spi_sync(struct spi_device *spi, struct spi_message *message); extern int spi_sync_locked(struct spi_device *spi, struct spi_message *message); extern int spi_bus_lock(struct spi_controller *ctlr); extern int spi_bus_unlock(struct spi_controller *ctlr); /** * spi_sync_transfer - synchronous SPI data transfer * @spi: device with which data will be exchanged * @xfers: An array of spi_transfers * @num_xfers: Number of items in the xfer array * Context: can sleep * * Does a synchronous SPI data transfer of the given spi_transfer array. * * For more specific semantics see spi_sync(). * * Return: zero on success, else a negative error code. */ static inline int spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, unsigned int num_xfers) { struct spi_message msg; spi_message_init_with_transfers(&msg, xfers, num_xfers); return spi_sync(spi, &msg); } /** * spi_write - SPI synchronous write * @spi: device to which data will be written * @buf: data buffer * @len: data buffer size * Context: can sleep * * This function writes the buffer @buf. * Callable only from contexts that can sleep. * * Return: zero on success, else a negative error code. */ static inline int spi_write(struct spi_device *spi, const void *buf, size_t len) { struct spi_transfer t = { .tx_buf = buf, .len = len, }; return spi_sync_transfer(spi, &t, 1); } /** * spi_read - SPI synchronous read * @spi: device from which data will be read * @buf: data buffer * @len: data buffer size * Context: can sleep * * This function reads the buffer @buf. * Callable only from contexts that can sleep. * * Return: zero on success, else a negative error code. */ static inline int spi_read(struct spi_device *spi, void *buf, size_t len) { struct spi_transfer t = { .rx_buf = buf, .len = len, }; return spi_sync_transfer(spi, &t, 1); } /* This copies txbuf and rxbuf data; for small transfers only! */ extern int spi_write_then_read(struct spi_device *spi, const void *txbuf, unsigned n_tx, void *rxbuf, unsigned n_rx); /** * spi_w8r8 - SPI synchronous 8 bit write followed by 8 bit read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * Callable only from contexts that can sleep. * * Return: the (unsigned) eight bit number returned by the * device, or else a negative error code. */ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd) { ssize_t status; u8 result; status = spi_write_then_read(spi, &cmd, 1, &result, 1); /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } /** * spi_w8r16 - SPI synchronous 8 bit write followed by 16 bit read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * The number is returned in wire-order, which is at least sometimes * big-endian. * * Callable only from contexts that can sleep. * * Return: the (unsigned) sixteen bit number returned by the * device, or else a negative error code. */ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) { ssize_t status; u16 result; status = spi_write_then_read(spi, &cmd, 1, &result, 2); /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } /** * spi_w8r16be - SPI synchronous 8 bit write followed by 16 bit big-endian read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * This function is similar to spi_w8r16, with the exception that it will * convert the read 16 bit data word from big-endian to native endianness. * * Callable only from contexts that can sleep. * * Return: the (unsigned) sixteen bit number returned by the device in CPU * endianness, or else a negative error code. */ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) { ssize_t status; __be16 result; status = spi_write_then_read(spi, &cmd, 1, &result, 2); if (status < 0) return status; return be16_to_cpu(result); } /*---------------------------------------------------------------------------*/ /* * INTERFACE between board init code and SPI infrastructure. * * No SPI driver ever sees these SPI device table segments, but * it's how the SPI core (or adapters that get hotplugged) grows * the driver model tree. * * As a rule, SPI devices can't be probed. Instead, board init code * provides a table listing the devices which are present, with enough * information to bind and set up the device's driver. There's basic * support for non-static configurations too; enough to handle adding * parport adapters, or microcontrollers acting as USB-to-SPI bridges. */ /** * struct spi_board_info - board-specific template for a SPI device * @modalias: Initializes spi_device.modalias; identifies the driver. * @platform_data: Initializes spi_device.platform_data; the particular * data stored there is driver-specific. * @swnode: Software node for the device. * @controller_data: Initializes spi_device.controller_data; some * controllers need hints about hardware setup, e.g. for DMA. * @irq: Initializes spi_device.irq; depends on how the board is wired. * @max_speed_hz: Initializes spi_device.max_speed_hz; based on limits * from the chip datasheet and board-specific signal quality issues. * @bus_num: Identifies which spi_controller parents the spi_device; unused * by spi_new_device(), and otherwise depends on board wiring. * @chip_select: Initializes spi_device.chip_select; depends on how * the board is wired. * @mode: Initializes spi_device.mode; based on the chip datasheet, board * wiring (some devices support both 3WIRE and standard modes), and * possibly presence of an inverter in the chipselect path. * * When adding new SPI devices to the device tree, these structures serve * as a partial device template. They hold information which can't always * be determined by drivers. Information that probe() can establish (such * as the default transfer wordsize) is not included here. * * These structures are used in two places. Their primary role is to * be stored in tables of board-specific device descriptors, which are * declared early in board initialization and then used (much later) to * populate a controller's device tree after the that controller's driver * initializes. A secondary (and atypical) role is as a parameter to * spi_new_device() call, which happens after those controller drivers * are active in some dynamic board configuration models. */ struct spi_board_info { /* * The device name and module name are coupled, like platform_bus; * "modalias" is normally the driver name. * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, * IRQ is copied too. */ char modalias[SPI_NAME_SIZE]; const void *platform_data; const struct software_node *swnode; void *controller_data; int irq; /* Slower signaling on noisy or low voltage boards */ u32 max_speed_hz; /* * bus_num is board specific and matches the bus_num of some * spi_controller that will probably be registered later. * * chip_select reflects how this chip is wired to that master; * it's less than num_chipselect. */ u16 bus_num; u16 chip_select; /* * mode becomes spi_device.mode, and is essential for chips * where the default of SPI_CS_HIGH = 0 is wrong. */ u32 mode; /* * ... may need additional spi_device chip config data here. * avoid stuff protocol drivers can set; but include stuff * needed to behave without being bound to a driver: * - quirks like clock rate mattering when not selected */ }; #ifdef CONFIG_SPI extern int spi_register_board_info(struct spi_board_info const *info, unsigned n); #else /* Board init code may ignore whether SPI is configured or not */ static inline int spi_register_board_info(struct spi_board_info const *info, unsigned n) { return 0; } #endif /* * If you're hotplugging an adapter with devices (parport, USB, etc) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). * * You can also use spi_alloc_device() and spi_add_device() to use a two * stage registration sequence for each spi_device. This gives the caller * some more control over the spi_device structure before it is registered, * but requires that caller to initialize fields that would otherwise * be defined using the board info. */ extern struct spi_device * spi_alloc_device(struct spi_controller *ctlr); extern int spi_add_device(struct spi_device *spi); extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); extern void spi_unregister_device(struct spi_device *spi); extern const struct spi_device_id * spi_get_device_id(const struct spi_device *sdev); extern const void * spi_get_device_match_data(const struct spi_device *sdev); static inline bool spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) { return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } /* Compatibility layer */ #define spi_master spi_controller #define SPI_MASTER_HALF_DUPLEX SPI_CONTROLLER_HALF_DUPLEX #define spi_master_get_devdata(_ctlr) spi_controller_get_devdata(_ctlr) #define spi_master_set_devdata(_ctlr, _data) \ spi_controller_set_devdata(_ctlr, _data) #define spi_master_get(_ctlr) spi_controller_get(_ctlr) #define spi_master_put(_ctlr) spi_controller_put(_ctlr) #define spi_master_suspend(_ctlr) spi_controller_suspend(_ctlr) #define spi_master_resume(_ctlr) spi_controller_resume(_ctlr) #define spi_register_master(_ctlr) spi_register_controller(_ctlr) #define devm_spi_register_master(_dev, _ctlr) \ devm_spi_register_controller(_dev, _ctlr) #define spi_unregister_master(_ctlr) spi_unregister_controller(_ctlr) #endif /* __LINUX_SPI_H */ |
6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Runtime locking correctness validator * * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * * see Documentation/locking/lockdep-design.rst for more details. */ #ifndef __LINUX_LOCKDEP_H #define __LINUX_LOCKDEP_H #include <linux/lockdep_types.h> #include <linux/smp.h> #include <asm/percpu.h> struct task_struct; #ifdef CONFIG_LOCKDEP #include <linux/linkage.h> #include <linux/list.h> #include <linux/debug_locks.h> #include <linux/stacktrace.h> static inline void lockdep_copy_map(struct lockdep_map *to, struct lockdep_map *from) { int i; *to = *from; /* * Since the class cache can be modified concurrently we could observe * half pointers (64bit arch using 32bit copy insns). Therefore clear * the caches and take the performance hit. * * XXX it doesn't work well with lockdep_set_class_and_subclass(), since * that relies on cache abuse. */ for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) to->class_cache[i] = NULL; } /* * Every lock has a list of other locks that were taken after it. * We only grow the list, never remove from it: */ struct lock_list { struct list_head entry; struct lock_class *class; struct lock_class *links_to; const struct lock_trace *trace; u16 distance; /* bitmap of different dependencies from head to this */ u8 dep; /* used by BFS to record whether "prev -> this" only has -(*R)-> */ u8 only_xr; /* * The parent field is used to implement breadth-first search, and the * bit 0 is reused to indicate if the lock has been accessed in BFS. */ struct lock_list *parent; }; /** * struct lock_chain - lock dependency chain record * * @irq_context: the same as irq_context in held_lock below * @depth: the number of held locks in this chain * @base: the index in chain_hlocks for this chain * @entry: the collided lock chains in lock_chain hash list * @chain_key: the hash key of this lock_chain */ struct lock_chain { /* see BUILD_BUG_ON()s in add_chain_cache() */ unsigned int irq_context : 2, depth : 6, base : 24; /* 4 byte hole */ struct hlist_node entry; u64 chain_key; }; #define MAX_LOCKDEP_KEYS_BITS 13 #define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) #define INITIAL_CHAIN_KEY -1 struct held_lock { /* * One-way hash of the dependency chain up to this point. We * hash the hashes step by step as the dependency chain grows. * * We use it for dependency-caching and we skip detection * passes and dependency-updates if there is a cache-hit, so * it is absolutely critical for 100% coverage of the validator * to have a unique key value for every unique dependency path * that can occur in the system, to make a unique hash value * as likely as possible - hence the 64-bit width. * * The task struct holds the current hash value (initialized * with zero), here we store the previous hash value: */ u64 prev_chain_key; unsigned long acquire_ip; struct lockdep_map *instance; struct lockdep_map *nest_lock; #ifdef CONFIG_LOCK_STAT u64 waittime_stamp; u64 holdtime_stamp; #endif /* * class_idx is zero-indexed; it points to the element in * lock_classes this held lock instance belongs to. class_idx is in * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive. */ unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS; /* * The lock-stack is unified in that the lock chains of interrupt * contexts nest ontop of process context chains, but we 'separate' * the hashes by starting with 0 if we cross into an interrupt * context, and we also keep do not add cross-context lock * dependencies - the lock usage graph walking covers that area * anyway, and we'd just unnecessarily increase the number of * dependencies otherwise. [Note: hardirq and softirq contexts * are separated from each other too.] * * The following field is used to detect when we cross into an * interrupt context: */ unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ unsigned int trylock:1; /* 16 bits */ unsigned int read:2; /* see lock_acquire() comment */ unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; unsigned int sync:1; unsigned int references:11; /* 32 bits */ unsigned int pin_count; }; /* * Initialization, self-test and debugging-output methods: */ extern void lockdep_init(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); extern void lockdep_free_key_range(void *start, unsigned long size); extern asmlinkage void lockdep_sys_exit(void); extern void lockdep_set_selftest_task(struct task_struct *task); extern void lockdep_init_task(struct task_struct *task); /* * Split the recursion counter in two to readily detect 'off' vs recursion. */ #define LOCKDEP_RECURSION_BITS 16 #define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS) #define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1) /* * lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due * to header dependencies. */ #define lockdep_off() \ do { \ current->lockdep_recursion += LOCKDEP_OFF; \ } while (0) #define lockdep_on() \ do { \ current->lockdep_recursion -= LOCKDEP_OFF; \ } while (0) extern void lockdep_register_key(struct lock_class_key *key); extern void lockdep_unregister_key(struct lock_class_key *key); /* * These methods are used by specific locking variants (spinlocks, * rwlocks, mutexes and rwsems) to pass init/acquire/release events * to lockdep: */ extern void lockdep_init_map_type(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass, u8 inner, u8 outer, u8 lock_type); static inline void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass, u8 inner, u8 outer) { lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL); } static inline void lockdep_init_map_wait(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass, u8 inner) { lockdep_init_map_waits(lock, name, key, subclass, inner, LD_WAIT_INV); } static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { lockdep_init_map_wait(lock, name, key, subclass, LD_WAIT_INV); } /* * Reinitialize a lock key - for cases where there is special locking or * special initialization of locks so that the validator gets the scope * of dependencies wrong: they are either too broad (they need a class-split) * or they are too narrow (they suffer from a false class-split): */ #define lockdep_set_class(lock, key) \ lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) #define lockdep_set_class_and_name(lock, key, name) \ lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) #define lockdep_set_class_and_subclass(lock, key, sub) \ lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) #define lockdep_set_subclass(lock, sub) \ lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) #define lockdep_set_novalidate_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) /* * Compare locking classes */ #define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key) static inline int lockdep_match_key(struct lockdep_map *lock, struct lock_class_key *key) { return lock->key == key; } /* * Acquire a lock. * * Values for "read": * * 0: exclusive (write) acquire * 1: read-acquire (no recursion allowed) * 2: read-acquire with same-instance recursion allowed * * Values for check: * * 0: simple checks (freeing, held-at-exit-time, etc.) * 1: full validation */ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *nest_lock, unsigned long ip); extern void lock_release(struct lockdep_map *lock, unsigned long ip); extern void lock_sync(struct lockdep_map *lock, unsigned int subclass, int read, int check, struct lockdep_map *nest_lock, unsigned long ip); /* lock_is_held_type() returns */ #define LOCK_STATE_UNKNOWN -1 #define LOCK_STATE_NOT_HELD 0 #define LOCK_STATE_HELD 1 /* * Same "read" as for lock_acquire(), except -1 means any. */ extern int lock_is_held_type(const struct lockdep_map *lock, int read); static inline int lock_is_held(const struct lockdep_map *lock) { return lock_is_held_type(lock, -1); } #define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) #define lockdep_is_held_type(lock, r) lock_is_held_type(&(lock)->dep_map, (r)) extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); #define lock_set_novalidate_class(l, n, i) \ lock_set_class(l, n, &__lockdep_no_validate__, 0, i) static inline void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, unsigned long ip) { lock_set_class(lock, lock->name, lock->key, subclass, ip); } extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) #define lockdep_assert(cond) \ do { WARN_ON(debug_locks && !(cond)); } while (0) #define lockdep_assert_once(cond) \ do { WARN_ON_ONCE(debug_locks && !(cond)); } while (0) #define lockdep_assert_held(l) \ lockdep_assert(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) #define lockdep_assert_not_held(l) \ lockdep_assert(lockdep_is_held(l) != LOCK_STATE_HELD) #define lockdep_assert_held_write(l) \ lockdep_assert(lockdep_is_held_type(l, 0)) #define lockdep_assert_held_read(l) \ lockdep_assert(lockdep_is_held_type(l, 1)) #define lockdep_assert_held_once(l) \ lockdep_assert_once(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) #define lockdep_assert_none_held_once() \ lockdep_assert_once(!current->lockdep_depth) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) #define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) #define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c)) #define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c)) /* * Must use lock_map_aquire_try() with override maps to avoid * lockdep thinking they participate in the block chain. */ #define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \ struct lockdep_map _name = { \ .name = #_name "-wait-type-override", \ .wait_type_inner = _wait_type, \ .lock_type = LD_LOCK_WAIT_OVERRIDE, } #else /* !CONFIG_LOCKDEP */ static inline void lockdep_init_task(struct task_struct *task) { } static inline void lockdep_off(void) { } static inline void lockdep_on(void) { } static inline void lockdep_set_selftest_task(struct task_struct *task) { } # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) # define lock_set_class(l, n, key, s, i) do { (void)(key); } while (0) # define lock_set_novalidate_class(l, n, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \ do { (void)(name); (void)(key); } while (0) # define lockdep_init_map_waits(lock, name, key, sub, inner, outer) \ do { (void)(name); (void)(key); } while (0) # define lockdep_init_map_wait(lock, name, key, sub, inner) \ do { (void)(name); (void)(key); } while (0) # define lockdep_init_map(lock, name, key, sub) \ do { (void)(name); (void)(key); } while (0) # define lockdep_set_class(lock, key) do { (void)(key); } while (0) # define lockdep_set_class_and_name(lock, key, name) \ do { (void)(key); (void)(name); } while (0) #define lockdep_set_class_and_subclass(lock, key, sub) \ do { (void)(key); } while (0) #define lockdep_set_subclass(lock, sub) do { } while (0) #define lockdep_set_novalidate_class(lock) do { } while (0) /* * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP * case since the result is not well defined and the caller should rather * #ifdef the call himself. */ # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) # define lockdep_sys_exit() do { } while (0) static inline void lockdep_register_key(struct lock_class_key *key) { } static inline void lockdep_unregister_key(struct lock_class_key *key) { } #define lockdep_depth(tsk) (0) /* * Dummy forward declarations, allow users to write less ifdef-y code * and depend on dead code elimination. */ extern int lock_is_held(const void *); extern int lockdep_is_held(const void *); #define lockdep_is_held_type(l, r) (1) #define lockdep_assert(c) do { } while (0) #define lockdep_assert_once(c) do { } while (0) #define lockdep_assert_held(l) do { (void)(l); } while (0) #define lockdep_assert_not_held(l) do { (void)(l); } while (0) #define lockdep_assert_held_write(l) do { (void)(l); } while (0) #define lockdep_assert_held_read(l) do { (void)(l); } while (0) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) #define lockdep_assert_none_held_once() do { } while (0) #define lockdep_recursing(tsk) (0) #define NIL_COOKIE (struct pin_cookie){ } #define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; }) #define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) #define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) #define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \ struct lockdep_map __maybe_unused _name = {} #endif /* !LOCKDEP */ #ifdef CONFIG_PROVE_LOCKING void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn); #define lock_set_cmp_fn(lock, ...) lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__) #else #define lock_set_cmp_fn(lock, ...) do { } while (0) #endif enum xhlock_context_t { XHLOCK_HARD, XHLOCK_SOFT, XHLOCK_CTX_NR, }; /* * To initialize a lockdep_map statically use this macro. * Note that _name must not be NULL. */ #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } static inline void lockdep_invariant_state(bool force) {} static inline void lockdep_free_task(struct task_struct *task) {} #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ if (!try(_lock)) { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #define LOCK_CONTENDED_RETURN(_lock, try, lock) \ ({ \ int ____err = 0; \ if (!try(_lock)) { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ ____err = lock(_lock); \ } \ if (!____err) \ lock_acquired(&(_lock)->dep_map, _RET_IP_); \ ____err; \ }) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) #define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) #define LOCK_CONTENDED_RETURN(_lock, try, lock) \ lock(_lock) #endif /* CONFIG_LOCK_STAT */ #ifdef CONFIG_PROVE_LOCKING extern void print_irqtrace_events(struct task_struct *curr); #else static inline void print_irqtrace_events(struct task_struct *curr) { } #endif /* Variable used to make lockdep treat read_lock() as recursive in selftests */ #ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS extern unsigned int force_read_lock_recursive; #else /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */ #define force_read_lock_recursive 0 #endif /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */ #ifdef CONFIG_LOCKDEP extern bool read_lock_is_recursive(void); #else /* CONFIG_LOCKDEP */ /* If !LOCKDEP, the value is meaningless */ #define read_lock_is_recursive() 0 #endif /* * For trivial one-depth nesting of a lock-class, the following * global define can be used. (Subsystems with multiple levels * of nesting should define their own lock-nesting subclasses.) */ #define SINGLE_DEPTH_NESTING 1 /* * Map the dependency ops to NOP or to real lockdep ops, depending * on the per lock-class debug mode: */ #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define spin_release(l, i) lock_release(l, i) #define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwlock_acquire_read(l, s, t, i) \ do { \ if (read_lock_is_recursive()) \ lock_acquire_shared_recursive(l, s, t, NULL, i); \ else \ lock_acquire_shared(l, s, t, NULL, i); \ } while (0) #define rwlock_release(l, i) lock_release(l, i) #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) #define seqcount_release(l, i) lock_release(l, i) #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define mutex_release(l, i) lock_release(l, i) #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) #define rwsem_release(l, i) lock_release(l, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_try(l) lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_) #define lock_map_release(l) lock_release(l, _THIS_IP_) #define lock_map_sync(l) lock_sync(l, 0, 0, 1, NULL, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) # define might_lock_read(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) # define might_lock_nested(lock, subclass) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, subclass, 0, 1, 1, NULL, \ _THIS_IP_); \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); DECLARE_PER_CPU(unsigned int, lockdep_recursion); #define __lockdep_enabled (debug_locks && !this_cpu_read(lockdep_recursion)) #define lockdep_assert_irqs_enabled() \ do { \ WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_irqs_disabled() \ do { \ WARN_ON_ONCE(__lockdep_enabled && this_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_in_irq() \ do { \ WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \ } while (0) #define lockdep_assert_no_hardirq() \ do { \ WARN_ON_ONCE(__lockdep_enabled && (this_cpu_read(hardirq_context) || \ !this_cpu_read(hardirqs_enabled))); \ } while (0) #define lockdep_assert_preemption_enabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ __lockdep_enabled && \ (preempt_count() != 0 || \ !this_cpu_read(hardirqs_enabled))); \ } while (0) #define lockdep_assert_preemption_disabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ __lockdep_enabled && \ (preempt_count() == 0 && \ this_cpu_read(hardirqs_enabled))); \ } while (0) /* * Acceptable for protecting per-CPU resources accessed from BH. * Much like in_softirq() - semantics are ambiguous, use carefully. */ #define lockdep_assert_in_softirq() \ do { \ WARN_ON_ONCE(__lockdep_enabled && \ (!in_softirq() || in_irq() || in_nmi())); \ } while (0) #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define might_lock_nested(lock, subclass) do { } while (0) # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) # define lockdep_assert_no_hardirq() do { } while (0) # define lockdep_assert_preemption_enabled() do { } while (0) # define lockdep_assert_preemption_disabled() do { } while (0) # define lockdep_assert_in_softirq() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ lockdep_hardirq_context() && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) #else # define lockdep_assert_RT_in_threaded_ctx() do { } while (0) #endif #ifdef CONFIG_LOCKDEP void lockdep_rcu_suspicious(const char *file, const int line, const char *s); #else static inline void lockdep_rcu_suspicious(const char *file, const int line, const char *s) { } #endif #endif /* __LINUX_LOCKDEP_H */ |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 | // SPDX-License-Identifier: GPL-2.0 /* * quota.c - CephFS quota * * Copyright (C) 2017-2018 SUSE */ #include <linux/statfs.h> #include "super.h" #include "mds_client.h" void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); if (inc) atomic64_inc(&mdsc->quotarealms_count); else atomic64_dec(&mdsc->quotarealms_count); } static inline bool ceph_has_realms_with_quotas(struct inode *inode) { struct super_block *sb = inode->i_sb; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); struct inode *root = d_inode(sb->s_root); if (atomic64_read(&mdsc->quotarealms_count) > 0) return true; /* if root is the real CephFS root, we don't have quota realms */ if (root && ceph_ino(root) == CEPH_INO_ROOT) return false; /* MDS stray dirs have no quota realms */ if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino)) return false; /* otherwise, we can't know for sure */ return true; } void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { struct super_block *sb = mdsc->fsc->sb; struct ceph_mds_quota *h = msg->front.iov_base; struct ceph_client *cl = mdsc->fsc->client; struct ceph_vino vino; struct inode *inode; struct ceph_inode_info *ci; if (!ceph_inc_mds_stopping_blocker(mdsc, session)) return; if (msg->front.iov_len < sizeof(*h)) { pr_err_client(cl, "corrupt message mds%d len %d\n", session->s_mds, (int)msg->front.iov_len); ceph_msg_dump(msg); goto out; } /* lookup inode */ vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; inode = ceph_find_inode(sb, vino); if (!inode) { pr_warn_client(cl, "failed to find inode %llx\n", vino.ino); goto out; } ci = ceph_inode(inode); spin_lock(&ci->i_ceph_lock); ci->i_rbytes = le64_to_cpu(h->rbytes); ci->i_rfiles = le64_to_cpu(h->rfiles); ci->i_rsubdirs = le64_to_cpu(h->rsubdirs); __ceph_update_quota(ci, le64_to_cpu(h->max_bytes), le64_to_cpu(h->max_files)); spin_unlock(&ci->i_ceph_lock); iput(inode); out: ceph_dec_mds_stopping_blocker(mdsc); } static struct ceph_quotarealm_inode * find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) { struct ceph_quotarealm_inode *qri = NULL; struct rb_node **node, *parent = NULL; struct ceph_client *cl = mdsc->fsc->client; mutex_lock(&mdsc->quotarealms_inodes_mutex); node = &(mdsc->quotarealms_inodes.rb_node); while (*node) { parent = *node; qri = container_of(*node, struct ceph_quotarealm_inode, node); if (ino < qri->ino) node = &((*node)->rb_left); else if (ino > qri->ino) node = &((*node)->rb_right); else break; } if (!qri || (qri->ino != ino)) { /* Not found, create a new one and insert it */ qri = kmalloc(sizeof(*qri), GFP_KERNEL); if (qri) { qri->ino = ino; qri->inode = NULL; qri->timeout = 0; mutex_init(&qri->mutex); rb_link_node(&qri->node, parent, node); rb_insert_color(&qri->node, &mdsc->quotarealms_inodes); } else pr_warn_client(cl, "Failed to alloc quotarealms_inode\n"); } mutex_unlock(&mdsc->quotarealms_inodes_mutex); return qri; } /* * This function will try to lookup a realm inode which isn't visible in the * filesystem mountpoint. A list of these kind of inodes (not visible) is * maintained in the mdsc and freed only when the filesystem is umounted. * * Note that these inodes are kept in this list even if the lookup fails, which * allows to prevent useless lookup requests. */ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, struct super_block *sb, struct ceph_snap_realm *realm) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_quotarealm_inode *qri; struct inode *in; qri = find_quotarealm_inode(mdsc, realm->ino); if (!qri) return NULL; mutex_lock(&qri->mutex); if (qri->inode && ceph_is_any_caps(qri->inode)) { /* A request has already returned the inode */ mutex_unlock(&qri->mutex); return qri->inode; } /* Check if this inode lookup has failed recently */ if (qri->timeout && time_before_eq(jiffies, qri->timeout)) { mutex_unlock(&qri->mutex); return NULL; } if (qri->inode) { /* get caps */ int ret = __ceph_do_getattr(qri->inode, NULL, CEPH_STAT_CAP_INODE, true); if (ret >= 0) in = qri->inode; else in = ERR_PTR(ret); } else { in = ceph_lookup_inode(sb, realm->ino); } if (IS_ERR(in)) { doutc(cl, "Can't lookup inode %llx (err: %ld)\n", realm->ino, PTR_ERR(in)); qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ } else { qri->timeout = 0; qri->inode = in; } mutex_unlock(&qri->mutex); return in; } void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) { struct ceph_quotarealm_inode *qri; struct rb_node *node; /* * It should now be safe to clean quotarealms_inode tree without holding * mdsc->quotarealms_inodes_mutex... */ mutex_lock(&mdsc->quotarealms_inodes_mutex); while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) { node = rb_first(&mdsc->quotarealms_inodes); qri = rb_entry(node, struct ceph_quotarealm_inode, node); rb_erase(node, &mdsc->quotarealms_inodes); iput(qri->inode); kfree(qri); } mutex_unlock(&mdsc->quotarealms_inodes_mutex); } /* * This function walks through the snaprealm for an inode and returns the * ceph_snap_realm for the first snaprealm that has quotas set (max_files, * max_bytes, or any, depending on the 'which_quota' argument). If the root is * reached, return the root ceph_snap_realm instead. * * Note that the caller is responsible for calling ceph_put_snap_realm() on the * returned realm. * * Callers of this function need to hold mdsc->snap_rwsem. However, if there's * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false' * this function will return -EAGAIN; otherwise, the snaprealms walk-through * will be restarted. */ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode, enum quota_get_realm which_quota, bool retry) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; struct inode *in; bool has_quota; if (ceph_snap(inode) != CEPH_NOSNAP) return NULL; restart: realm = ceph_inode(inode)->i_snap_realm; if (realm) ceph_get_snap_realm(mdsc, realm); else pr_err_ratelimited_client(cl, "%p %llx.%llx null i_snap_realm\n", inode, ceph_vinop(inode)); while (realm) { bool has_inode; spin_lock(&realm->inodes_with_caps_lock); has_inode = realm->inode; in = has_inode ? igrab(realm->inode) : NULL; spin_unlock(&realm->inodes_with_caps_lock); if (has_inode && !in) break; if (!in) { up_read(&mdsc->snap_rwsem); in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); down_read(&mdsc->snap_rwsem); if (IS_ERR_OR_NULL(in)) break; ceph_put_snap_realm(mdsc, realm); if (!retry) return ERR_PTR(-EAGAIN); goto restart; } ci = ceph_inode(in); has_quota = __ceph_has_quota(ci, which_quota); iput(in); next = realm->parent; if (has_quota || !next) return realm; ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); realm = next; } if (realm) ceph_put_snap_realm(mdsc, realm); return NULL; } bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_snap_realm *old_realm, *new_realm; bool is_same; restart: /* * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem. * However, get_quota_realm may drop it temporarily. By setting the * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was * dropped and we can then restart the whole operation. */ down_read(&mdsc->snap_rwsem); old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true); new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false); if (PTR_ERR(new_realm) == -EAGAIN) { up_read(&mdsc->snap_rwsem); if (old_realm) ceph_put_snap_realm(mdsc, old_realm); goto restart; } is_same = (old_realm == new_realm); up_read(&mdsc->snap_rwsem); if (old_realm) ceph_put_snap_realm(mdsc, old_realm); if (new_realm) ceph_put_snap_realm(mdsc, new_realm); return is_same; } enum quota_check_op { QUOTA_CHECK_MAX_FILES_OP, /* check quota max_files limit */ QUOTA_CHECK_MAX_BYTES_OP, /* check quota max_files limit */ QUOTA_CHECK_MAX_BYTES_APPROACHING_OP /* check if quota max_files limit is approaching */ }; /* * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each * realm, it will execute quota check operation defined by the 'op' parameter. * The snaprealm walk is interrupted if the quota check detects that the quota * is exceeded or if the root inode is reached. */ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, loff_t delta) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_snap_realm *realm, *next; struct inode *in; u64 max, rvalue; bool exceeded = false; if (ceph_snap(inode) != CEPH_NOSNAP) return false; down_read(&mdsc->snap_rwsem); restart: realm = ceph_inode(inode)->i_snap_realm; if (realm) ceph_get_snap_realm(mdsc, realm); else pr_err_ratelimited_client(cl, "%p %llx.%llx null i_snap_realm\n", inode, ceph_vinop(inode)); while (realm) { bool has_inode; spin_lock(&realm->inodes_with_caps_lock); has_inode = realm->inode; in = has_inode ? igrab(realm->inode) : NULL; spin_unlock(&realm->inodes_with_caps_lock); if (has_inode && !in) break; if (!in) { up_read(&mdsc->snap_rwsem); in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); down_read(&mdsc->snap_rwsem); if (IS_ERR_OR_NULL(in)) break; ceph_put_snap_realm(mdsc, realm); goto restart; } ci = ceph_inode(in); spin_lock(&ci->i_ceph_lock); if (op == QUOTA_CHECK_MAX_FILES_OP) { max = ci->i_max_files; rvalue = ci->i_rfiles + ci->i_rsubdirs; } else { max = ci->i_max_bytes; rvalue = ci->i_rbytes; } spin_unlock(&ci->i_ceph_lock); switch (op) { case QUOTA_CHECK_MAX_FILES_OP: case QUOTA_CHECK_MAX_BYTES_OP: exceeded = (max && (rvalue + delta > max)); break; case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP: if (max) { if (rvalue >= max) exceeded = true; else { /* * when we're writing more that 1/16th * of the available space */ exceeded = (((max - rvalue) >> 4) < delta); } } break; default: /* Shouldn't happen */ pr_warn_client(cl, "Invalid quota check op (%d)\n", op); exceeded = true; /* Just break the loop */ } iput(in); next = realm->parent; if (exceeded || !next) break; ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); realm = next; } if (realm) ceph_put_snap_realm(mdsc, realm); up_read(&mdsc->snap_rwsem); return exceeded; } /* * ceph_quota_is_max_files_exceeded - check if we can create a new file * @inode: directory where a new file is being created * * This functions returns true is max_files quota allows a new file to be * created. It is necessary to walk through the snaprealm hierarchy (until the * FS root) to check all realms with quotas set. */ bool ceph_quota_is_max_files_exceeded(struct inode *inode) { if (!ceph_has_realms_with_quotas(inode)) return false; WARN_ON(!S_ISDIR(inode->i_mode)); return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1); } /* * ceph_quota_is_max_bytes_exceeded - check if we can write to a file * @inode: inode being written * @newsize: new size if write succeeds * * This functions returns true is max_bytes quota allows a file size to reach * @newsize; it returns false otherwise. */ bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize) { loff_t size = i_size_read(inode); if (!ceph_has_realms_with_quotas(inode)) return false; /* return immediately if we're decreasing file size */ if (newsize <= size) return false; return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size)); } /* * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes * @inode: inode being written * @newsize: new size if write succeeds * * This function returns true if the new file size @newsize will be consuming * more than 1/16th of the available quota space; it returns false otherwise. */ bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize) { loff_t size = ceph_inode(inode)->i_reported_size; if (!ceph_has_realms_with_quotas(inode)) return false; /* return immediately if we're decreasing file size */ if (newsize <= size) return false; return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP, (newsize - size)); } /* * ceph_quota_update_statfs - if root has quota update statfs with quota status * @fsc: filesystem client instance * @buf: statfs to update * * If the mounted filesystem root has max_bytes quota set, update the filesystem * statistics with the quota status. * * This function returns true if the stats have been updated, false otherwise. */ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) { struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_inode_info *ci; struct ceph_snap_realm *realm; struct inode *in; u64 total = 0, used, free; bool is_updated = false; down_read(&mdsc->snap_rwsem); realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES, true); up_read(&mdsc->snap_rwsem); if (!realm) return false; spin_lock(&realm->inodes_with_caps_lock); in = realm->inode ? igrab(realm->inode) : NULL; spin_unlock(&realm->inodes_with_caps_lock); if (in) { ci = ceph_inode(in); spin_lock(&ci->i_ceph_lock); if (ci->i_max_bytes) { total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT; used = ci->i_rbytes >> CEPH_BLOCK_SHIFT; /* For quota size less than 4MB, use 4KB block size */ if (!total) { total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT; used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT; buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT; } /* It is possible for a quota to be exceeded. * Report 'zero' in that case */ free = total > used ? total - used : 0; /* For quota size less than 4KB, report the * total=used=4KB,free=0 when quota is full * and total=free=4KB, used=0 otherwise */ if (!total) { total = 1; free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0; buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT; } } spin_unlock(&ci->i_ceph_lock); if (total) { buf->f_blocks = total; buf->f_bfree = free; buf->f_bavail = free; is_updated = true; } iput(in); } ceph_put_snap_realm(mdsc, realm); return is_updated; } |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 raw table, a port of the IPv4 raw table to IPv6 * * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) static bool raw_before_defrag __read_mostly; MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); module_param(raw_before_defrag, bool, 0000); static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW, }; static const struct xt_table packet_raw_before_defrag = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, }; static struct nf_hook_ops *rawtable_ops __read_mostly; static int ip6table_raw_table_init(struct net *net) { struct ip6t_replace *repl; const struct xt_table *table = &packet_raw; int ret; if (raw_before_defrag) table = &packet_raw_before_defrag; repl = ip6t_alloc_initial_table(table); if (repl == NULL) return -ENOMEM; ret = ip6t_register_table(net, table, repl, rawtable_ops); kfree(repl); return ret; } static void __net_exit ip6table_raw_net_pre_exit(struct net *net) { ip6t_unregister_table_pre_exit(net, "raw"); } static void __net_exit ip6table_raw_net_exit(struct net *net) { ip6t_unregister_table_exit(net, "raw"); } static struct pernet_operations ip6table_raw_net_ops = { .pre_exit = ip6table_raw_net_pre_exit, .exit = ip6table_raw_net_exit, }; static int __init ip6table_raw_init(void) { const struct xt_table *table = &packet_raw; int ret; if (raw_before_defrag) { table = &packet_raw_before_defrag; pr_info("Enabling raw table before defrag\n"); } ret = xt_register_template(table, ip6table_raw_table_init); if (ret < 0) return ret; /* Register hooks */ rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); } ret = register_pernet_subsys(&ip6table_raw_net_ops); if (ret < 0) { kfree(rawtable_ops); xt_unregister_template(table); return ret; } return ret; } static void __exit ip6table_raw_fini(void) { unregister_pernet_subsys(&ip6table_raw_net_ops); xt_unregister_template(&packet_raw); kfree(rawtable_ops); } module_init(ip6table_raw_init); module_exit(ip6table_raw_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Ip6tables legacy raw table"); |
55040 15864 55028 10176 10176 8460 8456 8460 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 | /* SPDX-License-Identifier: GPL-2.0+ */ /* * Read-Copy Update mechanism for mutual exclusion * * Copyright IBM Corporation, 2001 * * Author: Dipankar Sarma <dipankar@in.ibm.com> * * Based on the original work by Paul McKenney <paulmck@vnet.ibm.com> * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) * * For detailed explanation of Read-Copy Update mechanism see - * http://lse.sourceforge.net/locking/rcupdate.html * */ #ifndef __LINUX_RCUPDATE_H #define __LINUX_RCUPDATE_H #include <linux/types.h> #include <linux/compiler.h> #include <linux/atomic.h> #include <linux/irqflags.h> #include <linux/preempt.h> #include <linux/bottom_half.h> #include <linux/lockdep.h> #include <linux/cleanup.h> #include <asm/processor.h> #include <linux/cpumask.h> #include <linux/context_tracking_irq.h> #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) #define ulong2long(a) (*(long *)(&(a))) #define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b))) #define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b))) /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); struct rcu_gp_oldstate; unsigned long get_completed_synchronize_rcu(void); void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); // Maximum number of unsigned long values corresponding to // not-yet-completed RCU grace periods. #define NUM_ACTIVE_RCU_POLL_OLDSTATE 2 /** * same_state_synchronize_rcu - Are two old-state values identical? * @oldstate1: First old-state value. * @oldstate2: Second old-state value. * * The two old-state values must have been obtained from either * get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or * get_completed_synchronize_rcu(). Returns @true if the two values are * identical and @false otherwise. This allows structures whose lifetimes * are tracked by old-state values to push these values to a list header, * allowing those structures to be slightly smaller. */ static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned long oldstate2) { return oldstate1 == oldstate2; } #ifdef CONFIG_PREEMPT_RCU void __rcu_read_lock(void); void __rcu_read_unlock(void); /* * Defined as a macro as it is a very low level header included from * areas that don't even know about current. This gives the rcu_read_lock() * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting) #else /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TINY_RCU #define rcu_read_unlock_strict() do { } while (0) #else void rcu_read_unlock_strict(void); #endif static inline void __rcu_read_lock(void) { preempt_disable(); } static inline void __rcu_read_unlock(void) { preempt_enable(); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) { return 0; } #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_RCU_LAZY void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func); #else static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) { call_rcu(head, func); } #endif /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); #else static inline void rcu_init_tasks_generic(void) { } #endif #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); #else /* #ifdef CONFIG_RCU_STALL_COMMON */ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else static inline void rcu_irq_work_resched(void) { } #endif #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); int rcu_nocb_cpu_offload(int cpu); int rcu_nocb_cpu_deoffload(int cpu); void rcu_nocb_flush_deferred_wakeup(void); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ static inline void rcu_init_nohz(void) { } static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; } static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; } static inline void rcu_nocb_flush_deferred_wakeup(void) { } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /* * Note a quasi-voluntary context switch for RCU-tasks's benefit. * This is a macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU_GENERIC # ifdef CONFIG_TASKS_RCU # define rcu_tasks_classic_qs(t, preempt) \ do { \ if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); # else # define rcu_tasks_classic_qs(t, preempt) do { } while (0) # define call_rcu_tasks call_rcu # define synchronize_rcu_tasks synchronize_rcu # endif # ifdef CONFIG_TASKS_TRACE_RCU // Bits for ->trc_reader_special.b.need_qs field. #define TRC_NEED_QS 0x1 // Task needs a quiescent state. #define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state. u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new); void rcu_tasks_trace_qs_blkd(struct task_struct *t); # define rcu_tasks_trace_qs(t) \ do { \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ \ if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ likely(!___rttq_nesting)) { \ rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ rcu_tasks_trace_qs_blkd(t); \ } \ } while (0) # else # define rcu_tasks_trace_qs(t) do { } while (0) # endif #define rcu_tasks_qs(t, preempt) \ do { \ rcu_tasks_classic_qs((t), (preempt)); \ rcu_tasks_trace_qs(t); \ } while (0) # ifdef CONFIG_TASKS_RUDE_RCU void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks_rude(void); # endif #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); void exit_tasks_rcu_stop(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_classic_qs(t, preempt) do { } while (0) #define rcu_tasks_qs(t, preempt) do { } while (0) #define rcu_note_voluntary_context_switch(t) do { } while (0) #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } static inline void exit_tasks_rcu_stop(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ /** * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? * * As an accident of implementation, an RCU Tasks Trace grace period also * acts as an RCU grace period. However, this could change at any time. * Code relying on this accident must call this function to verify that * this accident is still happening. * * You have been warned! */ static inline bool rcu_trace_implies_rcu_gp(void) { return true; } /** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * * This macro resembles cond_resched(), except that it is defined to * report potential quiescent states to RCU-tasks even if the cond_resched() * machinery were to be shut off, as some advocate for PREEMPTION kernels. */ #define cond_resched_tasks_rcu_qs() \ do { \ rcu_tasks_qs(current, false); \ cond_resched(); \ } while (0) /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. */ #if defined(CONFIG_TREE_RCU) #include <linux/rcutree.h> #elif defined(CONFIG_TINY_RCU) #include <linux/rcutiny.h> #else #error "Unknown RCU implementation specified to kernel configuration" #endif /* * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls * are needed for dynamic initialization and destruction of rcu_head * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for * dynamic initialization and destruction of statically allocated rcu_head * structures. However, rcu_head structures allocated dynamically in the * heap don't need any initialization. */ #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD void init_rcu_head(struct rcu_head *head); void destroy_rcu_head(struct rcu_head *head); void init_rcu_head_on_stack(struct rcu_head *head); void destroy_rcu_head_on_stack(struct rcu_head *head); #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ static inline void init_rcu_head(struct rcu_head *head) { } static inline void destroy_rcu_head(struct rcu_head *head) { } static inline void init_rcu_head_on_stack(struct rcu_head *head) { } static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ static inline bool rcu_lockdep_current_cpu_online(void) { return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ extern struct lockdep_map rcu_lock_map; extern struct lockdep_map rcu_bh_lock_map; extern struct lockdep_map rcu_sched_lock_map; extern struct lockdep_map rcu_callback_map; #ifdef CONFIG_DEBUG_LOCK_ALLOC static inline void rcu_lock_acquire(struct lockdep_map *map) { lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) { lock_release(map, _THIS_IP_); } int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); int rcu_read_lock_sched_held(void); int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ # define rcu_lock_acquire(a) do { } while (0) # define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) { return 1; } static inline int rcu_read_lock_bh_held(void) { return 1; } static inline int rcu_read_lock_sched_held(void) { return !preemptible(); } static inline int rcu_read_lock_any_held(void) { return !preemptible(); } static inline int debug_lockdep_rcu_enabled(void) { return 0; } #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU /** * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met * @c: condition to check * @s: informative message * * This checks debug_lockdep_rcu_enabled() before checking (c) to * prevent early boot splats due to lockdep not yet being initialized, * and rechecks it after checking (c) to prevent false-positive splats * due to races with lockdep being disabled. See commit 3066820034b5dd * ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail. */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ static bool __section(".data.unlikely") __warned; \ if (debug_lockdep_rcu_enabled() && (c) && \ debug_lockdep_rcu_enabled() && !__warned) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) static inline void rcu_preempt_sleep_check(void) { RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) { } #endif /* #else #ifdef CONFIG_PROVE_RCU */ #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh read-side critical section"); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) #else /* #ifdef CONFIG_PROVE_RCU */ #define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ /* * Helper functions for rcu_dereference_check(), rcu_dereference_protected() * and rcu_assign_pointer(). Some of these could be folded into their * callers, but they are left separate in order to ease introduction of * multiple pointers markings to match different RCU implementations * (e.g., __srcu), should this make sense in the future. */ #ifdef __CHECKER__ #define rcu_check_sparse(p, space) \ ((void)(((typeof(*p) space *)p) == p)) #else /* #ifdef __CHECKER__ */ #define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ #define __unrcu_pointer(p, local) \ ({ \ typeof(*p) *local = (typeof(*p) *__force)(p); \ rcu_check_sparse(p, __rcu); \ ((typeof(*p) __force __kernel *)(local)); \ }) /** * unrcu_pointer - mark a pointer as not being RCU protected * @p: pointer needing to lose its __rcu property * * Converts @p from an __rcu pointer to a __kernel pointer. * This allows an __rcu pointer to be used with xchg() and friends. */ #define unrcu_pointer(p) __unrcu_pointer(p, __UNIQUE_ID(rcu)) #define __rcu_access_pointer(p, local, space) \ ({ \ typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(local)); \ }) #define __rcu_dereference_check(p, local, c, space) \ ({ \ /* Dependency order vs. p above. */ \ typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(local)); \ }) #define __rcu_dereference_protected(p, local, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) #define __rcu_dereference_raw(p, local) \ ({ \ /* Dependency order vs. p above. */ \ typeof(p) local = READ_ONCE(p); \ ((typeof(*p) __force __kernel *)(local)); \ }) #define rcu_dereference_raw(p) __rcu_dereference_raw(p, __UNIQUE_ID(rcu)) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable * @v: The value to statically initialize with. */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to * @v: value to assign (publish) * * Assigns the specified value to the specified RCU-protected * pointer, ensuring that any concurrent RCU readers will see * any prior initialization. * * Inserts memory barriers on architectures that require them * (which is most of them), and also prevents the compiler from * reordering the code that initializes the structure after the pointer * assignment. More importantly, this call documents which pointers * will be dereferenced by RCU read-side code. * * In some special cases, you may use RCU_INIT_POINTER() instead * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due * to the fact that it does not constrain either the CPU or the compiler. * That said, using RCU_INIT_POINTER() when you should have used * rcu_assign_pointer() is a very bad thing that results in * impossible-to-diagnose memory corruption. So please be careful. * See the RCU_INIT_POINTER() comment header for details. * * Note that rcu_assign_pointer() evaluates each of its arguments only * once, appearances notwithstanding. One of the "extra" evaluations * is in typeof() and the other visible only to sparse (__CHECKER__), * neither of which actually execute the argument. As with most cpp * macros, this execute-arguments-only-once property is important, so * please be careful when making changes to rcu_assign_pointer() and the * other macros that it invokes. */ #define rcu_assign_pointer(p, v) \ do { \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ rcu_check_sparse(p, __rcu); \ \ if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ else \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ } while (0) /** * rcu_replace_pointer() - replace an RCU pointer, returning its old value * @rcu_ptr: RCU pointer, whose old value is returned * @ptr: regular pointer * @c: the lockdep conditions under which the dereference will take place * * Perform a replacement, where @rcu_ptr is an RCU-annotated * pointer and @c is the lockdep argument that is passed to the * rcu_dereference_protected() call used to read that pointer. The old * value of @rcu_ptr is returned, and @rcu_ptr is set to @ptr. */ #define rcu_replace_pointer(rcu_ptr, ptr, c) \ ({ \ typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ rcu_assign_pointer((rcu_ptr), (ptr)); \ __tmp; \ }) /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing * @p: The pointer to read * * Return the value of the specified RCU-protected pointer, but omit the * lockdep checks for being in an RCU read-side critical section. This is * useful when the value of this pointer is accessed, but the pointer is * not dereferenced, for example, when testing an RCU-protected pointer * against NULL. Although rcu_access_pointer() may also be used in cases * where update-side locks prevent the value of the pointer from changing, * you should instead use rcu_dereference_protected() for this use case. * Within an RCU read-side critical section, there is little reason to * use rcu_access_pointer(). * * It is usually best to test the rcu_access_pointer() return value * directly in order to avoid accidental dereferences being introduced * by later inattentive changes. In other words, assigning the * rcu_access_pointer() return value to a local variable results in an * accident waiting to happen. * * It is also permissible to use rcu_access_pointer() when read-side * access to the pointer was removed at least one grace period ago, as is * the case in the context of the RCU callback that is freeing up the data, * or after a synchronize_rcu() returns. This can be useful when tearing * down multi-linked structures after a grace period has elapsed. However, * rcu_dereference_protected() is normally preferred for this use case. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) /** * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the * dereference will take place are correct. Typically the conditions * indicate the various locking conditions that should be held at that * point. The check should return true if the conditions are satisfied. * An implicit check for being in an RCU read-side critical section * (rcu_read_lock()) is included. * * For example: * * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); * * Inserts memory barriers on architectures that require them * (currently only the Alpha), prevents the compiler from refetching * (and from merging fetches), and, more importantly, documents exactly * which pointers are protected by RCU and checks that the pointer is * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * This is the RCU-bh counterpart to rcu_dereference_check(). However, * please note that starting in v5.0 kernels, vanilla RCU grace periods * wait for local_bh_disable() regions of code in addition to regions of * code demarked by rcu_read_lock() and rcu_read_unlock(). This means * that synchronize_rcu(), call_rcu, and friends all take not only * rcu_read_lock() but also rcu_read_lock_bh() into account. */ #define rcu_dereference_bh_check(p, c) \ __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * This is the RCU-sched counterpart to rcu_dereference_check(). * However, please note that starting in v5.0 kernels, vanilla RCU grace * periods wait for preempt_disable() regions of code in addition to * regions of code demarked by rcu_read_lock() and rcu_read_unlock(). * This means that synchronize_rcu(), call_rcu, and friends all take not * only rcu_read_lock() but also rcu_read_lock_sched() into account. */ #define rcu_dereference_sched_check(p, c) \ __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ (c) || rcu_read_lock_sched_held(), \ __rcu) /* * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. * * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ #define rcu_dereference_raw_check(p) \ __rcu_dereference_check((p), __UNIQUE_ID(rcu), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * the READ_ONCE(). This is useful in cases where update-side locks * prevent the value of the pointer from changing. Please note that this * primitive does *not* prevent the compiler from repeating this reference * or combining it with other references, so it should not be used without * protection of appropriate locks. * * This function is only for update-side use. Using this function * when protected only by rcu_read_lock() will result in infrequent * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ __rcu_dereference_protected((p), __UNIQUE_ID(rcu), (c), __rcu) /** * rcu_dereference() - fetch RCU-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * This is a simple wrapper around rcu_dereference_check(). */ #define rcu_dereference(p) rcu_dereference_check(p, 0) /** * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) /** * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) /** * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism * @p: The pointer to hand off * * This is simply an identity function, but it documents where a pointer * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to * kill_dependency(). It could be used as follows:: * * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); * if (long_lived) { * if (!atomic_inc_not_zero(p->refcnt)) * long_lived = false; * else * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); */ #define rcu_pointer_handoff(p) (p) /** * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the * synchronize_rcu() is guaranteed to block until after all the other * CPUs exit their critical sections. Similarly, if call_rcu() is invoked * on one CPU while other CPUs are within RCU read-side critical * sections, invocation of the corresponding RCU callback is deferred * until after the all the other CPUs exit their critical sections. * * In v5.0 and later kernels, synchronize_rcu() and call_rcu() also * wait for regions of code with preemption disabled, including regions of * code with interrupts or softirqs disabled. In pre-v5.0 kernels, which * define synchronize_sched(), only code enclosed within rcu_read_lock() * and rcu_read_unlock() are guaranteed to be waited for. * * Note, however, that RCU callbacks are permitted to run concurrently * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU * callback is invoked. This is legal, because the RCU read-side critical * section that was running concurrently with the call_rcu() (and which * therefore might be referencing something that the corresponding RCU * callback would free up) has completed before the corresponding * RCU callback is invoked. * * RCU read-side critical sections may be nested. Any deferred actions * will be deferred until the outermost RCU read-side critical section * completes. * * You can avoid reading and understanding the next paragraph by * following this rule: don't put anything in an rcu_read_lock() RCU * read-side critical section that would block in a !PREEMPTION kernel. * But if you want the full story, read on! * * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), * it is illegal to block while in an RCU read-side critical section. * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION * kernel builds, RCU read-side critical sections may be preempted, * but explicit blocking is illegal. Finally, in preemptible RCU * implementations in real-time (with -rt patchset) kernel builds, RCU * read-side critical sections may be preempted and they may also block, but * only when acquiring spinlocks that are subject to priority inheritance. */ static __always_inline void rcu_read_lock(void) { __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock() used illegally while idle"); } /* * So where is rcu_write_lock()? It does not exist, as there is no * way for writers to lock out RCU readers. This is a feature, not * a bug -- this property is what provides RCU's performance benefits. * Of course, writers must coordinate with each other. The normal * spinlock primitives work well for this, but any other technique may be * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ /** * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * In almost all situations, rcu_read_unlock() is immune from deadlock. * In recent kernels that have consolidated synchronize_sched() and * synchronize_rcu_bh() into synchronize_rcu(), this deadlock immunity * also extends to the scheduler's runqueue and priority-inheritance * spinlocks, courtesy of the quiescent-state deferral that is carried * out when rcu_read_unlock() is invoked with interrupts disabled. * * See rcu_read_lock() for more information. */ static inline void rcu_read_unlock(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); __release(RCU); __rcu_read_unlock(); rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ } /** * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent to rcu_read_lock(), but also disables softirqs. * Note that anything else that disables softirqs can also serve as an RCU * read-side critical section. However, please note that this equivalence * applies only to v5.0 and later. Before v5.0, rcu_read_lock() and * rcu_read_lock_bh() were unrelated. * * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh() * must occur in the same context, for example, it is illegal to invoke * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh() * was invoked from some other task. */ static inline void rcu_read_lock_bh(void) { local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_bh() used illegally while idle"); } /** * rcu_read_unlock_bh() - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ static inline void rcu_read_unlock_bh(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); } /** * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * * This is equivalent to rcu_read_lock(), but also disables preemption. * Read-side critical sections can also be introduced by anything else that * disables preemption, including local_irq_disable() and friends. However, * please note that the equivalence to rcu_read_lock() applies only to * v5.0 and later. Before v5.0, rcu_read_lock() and rcu_read_lock_sched() * were unrelated. * * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched() * must occur in the same context, for example, it is illegal to invoke * rcu_read_unlock_sched() from process context if the matching * rcu_read_lock_sched() was invoked from an NMI handler. */ static inline void rcu_read_lock_sched(void) { preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_sched() used illegally while idle"); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); __acquire(RCU_SCHED); } /** * rcu_read_unlock_sched() - marks the end of a RCU-classic critical section * * See rcu_read_lock_sched() for more information. */ static inline void rcu_read_unlock_sched(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_unlock_sched_notrace(void) { __release(RCU_SCHED); preempt_enable_notrace(); } /** * RCU_INIT_POINTER() - initialize an RCU protected pointer * @p: The pointer to be initialized. * @v: The value to initialized the pointer to. * * Initialize an RCU-protected pointer in special cases where readers * do not need ordering constraints on the CPU or the compiler. These * special cases are: * * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer *or* * 2. The caller has taken whatever steps are required to prevent * RCU readers from concurrently accessing this pointer *or* * 3. The referenced data structure has already been exposed to * readers either at compile time or via rcu_assign_pointer() *and* * * a. You have not made *any* reader-visible changes to * this structure since then *or* * b. It is OK for readers accessing this structure from its * new location to see the old state of the structure. (For * example, the changes were to statistical counters or to * other state where exact synchronization is not required.) * * Failure to follow these rules governing use of RCU_INIT_POINTER() will * result in impossible-to-diagnose memory corruption. As in the structures * will look OK in crash dumps, but any concurrent RCU readers might * see pre-initialized values of the referenced data structure. So * please be very careful how you use RCU_INIT_POINTER()!!! * * If you are creating an RCU-protected linked structure that is accessed * by a single external-to-structure RCU-protected pointer, then you may * use RCU_INIT_POINTER() to initialize the internal RCU-protected * pointers, but you must use rcu_assign_pointer() to initialize the * external-to-structure pointer *after* you have completely initialized * the reader-accessible portions of the linked structure. * * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no * ordering guarantees for either the CPU or the compiler. */ #define RCU_INIT_POINTER(p, v) \ do { \ rcu_check_sparse(p, __rcu); \ WRITE_ONCE(p, RCU_INITIALIZER(v)); \ } while (0) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer * @p: The pointer to be initialized. * @v: The value to initialized the pointer to. * * GCC-style initialization for an RCU-protected pointer in a structure field. */ #define RCU_POINTER_INITIALIZER(p, v) \ .p = RCU_INITIALIZER(v) /* * Does the specified offset indicate that the corresponding rcu_head * structure can be handled by kvfree_rcu()? */ #define __is_kvfree_rcu_offset(offset) ((offset) < 4096) /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree for double-argument invocations. * @rhf: the name of the struct rcu_head within the type of @ptr. * * Many rcu callbacks functions just call kfree() on the base structure. * These functions are trivial, but their size adds up, and furthermore * when they are used in a kernel module, that module must invoke the * high-latency rcu_barrier() function at module-unload time. * * The kfree_rcu() function handles this issue. Rather than encoding a * function address in the embedded rcu_head structure, kfree_rcu() instead * encodes the offset of the rcu_head structure within the base structure. * Because the functions are not allowed in the low-order 4096 bytes of * kernel virtual memory, offsets up to 4095 bytes can be accommodated. * If the offset is larger than 4095 bytes, a compile-time error will * be generated in kvfree_rcu_arg_2(). If this error is triggered, you can * either fall back to use of call_rcu() or rearrange the structure to * position the rcu_head structure into the first 4096 bytes. * * The object to be freed can be allocated either by kmalloc() or * kmem_cache_alloc(). * * Note that the allowable offset might decrease in the future. * * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ #define kfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf) #define kvfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf) /** * kfree_rcu_mightsleep() - kfree an object after a grace period. * @ptr: pointer to kfree for single-argument invocations. * * When it comes to head-less variant, only one argument * is passed and that is just a pointer which has to be * freed after a grace period. Therefore the semantic is * * kfree_rcu_mightsleep(ptr); * * where @ptr is the pointer to be freed by kvfree(). * * Please note, head-less way of freeing is permitted to * use from a context that has to follow might_sleep() * annotation. Otherwise, please switch and embed the * rcu_head structure within the type of @ptr. */ #define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) #define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) #define kvfree_rcu_arg_2(ptr, rhf) \ do { \ typeof (ptr) ___p = (ptr); \ \ if (___p) { \ BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \ kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ } \ } while (0) #define kvfree_rcu_arg_1(ptr) \ do { \ typeof(ptr) ___p = (ptr); \ \ if (___p) \ kvfree_call_rcu(NULL, (void *) (___p)); \ } while (0) /* * Place this after a lock-acquisition primitive to guarantee that * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies * if the UNLOCK and LOCK are executed by the same CPU or if the * UNLOCK and LOCK operate on the same lock variable. */ #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE #define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ #else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ #define smp_mb__after_unlock_lock() do { } while (0) #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ /* Has the specified rcu_head structure been handed to call_rcu()? */ /** * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu() * @rhp: The rcu_head structure to initialize. * * If you intend to invoke rcu_head_after_call_rcu() to test whether a * given rcu_head structure has already been passed to call_rcu(), then * you must also invoke this rcu_head_init() function on it just after * allocating that structure. Calls to this function must not race with * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation. */ static inline void rcu_head_init(struct rcu_head *rhp) { rhp->func = (rcu_callback_t)~0L; } /** * rcu_head_after_call_rcu() - Has this rcu_head been passed to call_rcu()? * @rhp: The rcu_head structure to test. * @f: The function passed to call_rcu() along with @rhp. * * Returns @true if the @rhp has been passed to call_rcu() with @func, * and @false otherwise. Emits a warning in any other case, including * the case where @rhp has already been invoked after a grace period. * Calls to this function must not race with callback invocation. One way * to avoid such races is to enclose the call to rcu_head_after_call_rcu() * in an RCU read-side critical section that includes a read-side fetch * of the pointer to the structure containing @rhp. */ static inline bool rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) { rcu_callback_t func = READ_ONCE(rhp->func); if (func == f) return true; WARN_ON_ONCE(func != (rcu_callback_t)~0L); return false; } /* kernel/ksysfs.c definitions */ extern int rcu_expedited; extern int rcu_normal; DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) #endif /* __LINUX_RCUPDATE_H */ |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 | // SPDX-License-Identifier: GPL-2.0-or-later /* XTS: as defined in IEEE1619/D16 * http://grouper.ieee.org/groups/1619/email/pdf00086.pdf * * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org> * * Based on ecb.c * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <crypto/xts.h> #include <crypto/b128ops.h> #include <crypto/gf128mul.h> struct xts_tfm_ctx { struct crypto_skcipher *child; struct crypto_cipher *tweak; }; struct xts_instance_ctx { struct crypto_skcipher_spawn spawn; struct crypto_cipher_spawn tweak_spawn; }; struct xts_request_ctx { le128 t; struct scatterlist *tail; struct scatterlist sg[2]; struct skcipher_request subreq; }; static int xts_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child; struct crypto_cipher *tweak; int err; err = xts_verify_key(parent, key, keylen); if (err) return err; keylen /= 2; /* we need two cipher instances: one to compute the initial 'tweak' * by encrypting the IV (usually the 'plain' iv) and the other * one to encrypt and decrypt the data */ /* tweak cipher, uses Key2 i.e. the second half of *key */ tweak = ctx->tweak; crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(tweak, key + keylen, keylen); if (err) return err; /* data cipher, uses Key1 i.e. the first half of *key */ child = ctx->child; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } /* * We compute the tweak masks twice (both before and after the ECB encryption or * decryption) to avoid having to allocate a temporary buffer and/or make * mutliple calls to the 'ecb(..)' instance, which usually would be slower than * just doing the gf128mul_x_ble() calls again. */ static int xts_xor_tweak(struct skcipher_request *req, bool second_pass, bool enc) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); const int bs = XTS_BLOCK_SIZE; struct skcipher_walk w; le128 t = rctx->t; int err; if (second_pass) { req = &rctx->subreq; /* set to our TFM to enforce correct alignment: */ skcipher_request_set_tfm(req, tfm); } err = skcipher_walk_virt(&w, req, false); while (w.nbytes) { unsigned int avail = w.nbytes; le128 *wsrc; le128 *wdst; wsrc = w.src.virt.addr; wdst = w.dst.virt.addr; do { if (unlikely(cts) && w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) { if (!enc) { if (second_pass) rctx->t = t; gf128mul_x_ble(&t, &t); } le128_xor(wdst, &t, wsrc); if (enc && second_pass) gf128mul_x_ble(&rctx->t, &t); skcipher_walk_done(&w, avail - bs); return 0; } le128_xor(wdst++, &t, wsrc++); gf128mul_x_ble(&t, &t); } while ((avail -= bs) >= bs); err = skcipher_walk_done(&w, avail); } return err; } static int xts_xor_tweak_pre(struct skcipher_request *req, bool enc) { return xts_xor_tweak(req, false, enc); } static int xts_xor_tweak_post(struct skcipher_request *req, bool enc) { return xts_xor_tweak(req, true, enc); } static void xts_cts_done(void *data, int err) { struct skcipher_request *req = data; le128 b; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); le128_xor(&b, &rctx->t, &b); scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 1); } skcipher_request_complete(req, err); } static int xts_cts_final(struct skcipher_request *req, int (*crypt)(struct skcipher_request *req)) { const struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1); struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int tail = req->cryptlen % XTS_BLOCK_SIZE; le128 b[2]; int err; rctx->tail = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); b[1] = b[0]; scatterwalk_map_and_copy(b, req->src, offset, tail, 0); le128_xor(b, &rctx->t, b); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE + tail, 1); skcipher_request_set_tfm(subreq, ctx->child); skcipher_request_set_callback(subreq, req->base.flags, xts_cts_done, req); skcipher_request_set_crypt(subreq, rctx->tail, rctx->tail, XTS_BLOCK_SIZE, NULL); err = crypt(subreq); if (err) return err; scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); le128_xor(b, &rctx->t, b); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 1); return 0; } static void xts_encrypt_done(void *data, int err) { struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, true); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_encrypt); if (err == -EINPROGRESS || err == -EBUSY) return; } } skcipher_request_complete(req, err); } static void xts_decrypt_done(void *data, int err) { struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, false); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_decrypt); if (err == -EINPROGRESS || err == -EBUSY) return; } } skcipher_request_complete(req, err); } static int xts_init_crypt(struct skcipher_request *req, crypto_completion_t compl) { const struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; if (req->cryptlen < XTS_BLOCK_SIZE) return -EINVAL; skcipher_request_set_tfm(subreq, ctx->child); skcipher_request_set_callback(subreq, req->base.flags, compl, req); skcipher_request_set_crypt(subreq, req->dst, req->dst, req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL); /* calculate first value of T */ crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv); return 0; } static int xts_encrypt(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int err; err = xts_init_crypt(req, xts_encrypt_done) ?: xts_xor_tweak_pre(req, true) ?: crypto_skcipher_encrypt(subreq) ?: xts_xor_tweak_post(req, true); if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) return err; return xts_cts_final(req, crypto_skcipher_encrypt); } static int xts_decrypt(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int err; err = xts_init_crypt(req, xts_decrypt_done) ?: xts_xor_tweak_pre(req, false) ?: crypto_skcipher_decrypt(subreq) ?: xts_xor_tweak_post(req, false); if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) return err; return xts_cts_final(req, crypto_skcipher_decrypt); } static int xts_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *child; struct crypto_cipher *tweak; child = crypto_spawn_skcipher(&ictx->spawn); if (IS_ERR(child)) return PTR_ERR(child); ctx->child = child; tweak = crypto_spawn_cipher(&ictx->tweak_spawn); if (IS_ERR(tweak)) { crypto_free_skcipher(ctx->child); return PTR_ERR(tweak); } ctx->tweak = tweak; crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) + sizeof(struct xts_request_ctx)); return 0; } static void xts_exit_tfm(struct crypto_skcipher *tfm) { struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); crypto_free_cipher(ctx->tweak); } static void xts_free_instance(struct skcipher_instance *inst) { struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ictx->spawn); crypto_drop_cipher(&ictx->tweak_spawn); kfree(inst); } static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_alg_common *alg; char name[CRYPTO_MAX_ALG_NAME]; struct skcipher_instance *inst; struct xts_instance_ctx *ctx; const char *cipher_name; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) return PTR_ERR(cipher_name); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); if (err == -ENOENT) { err = -ENAMETOOLONG; if (snprintf(name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), name, 0, mask); } if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(&ctx->spawn); err = -EINVAL; if (alg->base.cra_blocksize != XTS_BLOCK_SIZE) goto err_free_inst; if (alg->ivsize) goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts", &alg->base); if (err) goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; /* Alas we screwed up the naming so we have to mangle the * cipher name. */ if (!strncmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(name, cipher_name + 4, sizeof(name)); if (len < 2) goto err_free_inst; if (name[len - 1] != ')') goto err_free_inst; name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "xts(%s)", name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; goto err_free_inst; } } else goto err_free_inst; err = crypto_grab_cipher(&ctx->tweak_spawn, skcipher_crypto_instance(inst), name, 0, mask); if (err) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE; inst->alg.base.cra_alignmask = alg->base.cra_alignmask | (__alignof__(u64) - 1); inst->alg.ivsize = XTS_BLOCK_SIZE; inst->alg.min_keysize = alg->min_keysize * 2; inst->alg.max_keysize = alg->max_keysize * 2; inst->alg.base.cra_ctxsize = sizeof(struct xts_tfm_ctx); inst->alg.init = xts_init_tfm; inst->alg.exit = xts_exit_tfm; inst->alg.setkey = xts_setkey; inst->alg.encrypt = xts_encrypt; inst->alg.decrypt = xts_decrypt; inst->free = xts_free_instance; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: xts_free_instance(inst); } return err; } static struct crypto_template xts_tmpl = { .name = "xts", .create = xts_create, .module = THIS_MODULE, }; static int __init xts_module_init(void) { return crypto_register_template(&xts_tmpl); } static void __exit xts_module_exit(void) { crypto_unregister_template(&xts_tmpl); } subsys_initcall(xts_module_init); module_exit(xts_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); MODULE_ALIAS_CRYPTO("xts"); MODULE_IMPORT_NS(CRYPTO_INTERNAL); MODULE_SOFTDEP("pre: ecb"); |
57 57 6 22 54 12 37 55 23 2 54 3 25 48 55 1 35 3 11 54 3 54 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | // SPDX-License-Identifier: GPL-2.0-only /* * LZO1X Decompressor from LZO * * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * * Changed for Linux kernel use by: * Nitin Gupta <nitingupta910@gmail.com> * Richard Purdie <rpurdie@openedhand.com> */ #ifndef STATIC #include <linux/module.h> #include <linux/kernel.h> #endif #include <asm/unaligned.h> #include <linux/lzo.h> #include "lzodefs.h" #define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) #define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) #define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun #define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun #define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun /* This MAX_255_COUNT is the maximum number of times we can add 255 to a base * count without overflowing an integer. The multiply will overflow when * multiplying 255 by more than MAXINT/255. The sum will overflow earlier * depending on the base count. Since the base count is taken from a u8 * and a few bits, it is safe to assume that it will always be lower than * or equal to 2*255, thus we can always prevent any overflow by accepting * two less 255 steps. See Documentation/staging/lzo.rst for more information. */ #define MAX_255_COUNT ((((size_t)~0) / 255) - 2) int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len) { unsigned char *op; const unsigned char *ip; size_t t, next; size_t state = 0; const unsigned char *m_pos; const unsigned char * const ip_end = in + in_len; unsigned char * const op_end = out + *out_len; unsigned char bitstream_version; op = out; ip = in; if (unlikely(in_len < 3)) goto input_overrun; if (likely(in_len >= 5) && likely(*ip == 17)) { bitstream_version = ip[1]; ip += 2; } else { bitstream_version = 0; } if (*ip > 17) { t = *ip++ - 17; if (t < 4) { next = t; goto match_next; } goto copy_literal_run; } for (;;) { t = *ip++; if (t < 16) { if (likely(state == 0)) { if (unlikely(t == 0)) { size_t offset; const unsigned char *ip_last = ip; while (unlikely(*ip == 0)) { ip++; NEED_IP(1); } offset = ip - ip_last; if (unlikely(offset > MAX_255_COUNT)) return LZO_E_ERROR; offset = (offset << 8) - offset; t += offset + 15 + *ip++; } t += 3; copy_literal_run: #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { const unsigned char *ie = ip + t; unsigned char *oe = op + t; do { COPY8(op, ip); op += 8; ip += 8; COPY8(op, ip); op += 8; ip += 8; } while (ip < ie); ip = ie; op = oe; } else #endif { NEED_OP(t); NEED_IP(t + 3); do { *op++ = *ip++; } while (--t > 0); } state = 4; continue; } else if (state != 4) { next = t & 3; m_pos = op - 1; m_pos -= t >> 2; m_pos -= *ip++ << 2; TEST_LB(m_pos); NEED_OP(2); op[0] = m_pos[0]; op[1] = m_pos[1]; op += 2; goto match_next; } else { next = t & 3; m_pos = op - (1 + M2_MAX_OFFSET); m_pos -= t >> 2; m_pos -= *ip++ << 2; t = 3; } } else if (t >= 64) { next = t & 3; m_pos = op - 1; m_pos -= (t >> 2) & 7; m_pos -= *ip++ << 3; t = (t >> 5) - 1 + (3 - 1); } else if (t >= 32) { t = (t & 31) + (3 - 1); if (unlikely(t == 2)) { size_t offset; const unsigned char *ip_last = ip; while (unlikely(*ip == 0)) { ip++; NEED_IP(1); } offset = ip - ip_last; if (unlikely(offset > MAX_255_COUNT)) return LZO_E_ERROR; offset = (offset << 8) - offset; t += offset + 31 + *ip++; NEED_IP(2); } m_pos = op - 1; next = get_unaligned_le16(ip); ip += 2; m_pos -= next >> 2; next &= 3; } else { NEED_IP(2); next = get_unaligned_le16(ip); if (((next & 0xfffc) == 0xfffc) && ((t & 0xf8) == 0x18) && likely(bitstream_version)) { NEED_IP(3); t &= 7; t |= ip[2] << 3; t += MIN_ZERO_RUN_LENGTH; NEED_OP(t); memset(op, 0, t); op += t; next &= 3; ip += 3; goto match_next; } else { m_pos = op; m_pos -= (t & 8) << 11; t = (t & 7) + (3 - 1); if (unlikely(t == 2)) { size_t offset; const unsigned char *ip_last = ip; while (unlikely(*ip == 0)) { ip++; NEED_IP(1); } offset = ip - ip_last; if (unlikely(offset > MAX_255_COUNT)) return LZO_E_ERROR; offset = (offset << 8) - offset; t += offset + 7 + *ip++; NEED_IP(2); next = get_unaligned_le16(ip); } ip += 2; m_pos -= next >> 2; next &= 3; if (m_pos == op) goto eof_found; m_pos -= 0x4000; } } TEST_LB(m_pos); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) if (op - m_pos >= 8) { unsigned char *oe = op + t; if (likely(HAVE_OP(t + 15))) { do { COPY8(op, m_pos); op += 8; m_pos += 8; COPY8(op, m_pos); op += 8; m_pos += 8; } while (op < oe); op = oe; if (HAVE_IP(6)) { state = next; COPY4(op, ip); op += next; ip += next; continue; } } else { NEED_OP(t); do { *op++ = *m_pos++; } while (op < oe); } } else #endif { unsigned char *oe = op + t; NEED_OP(t); op[0] = m_pos[0]; op[1] = m_pos[1]; op += 2; m_pos += 2; do { *op++ = *m_pos++; } while (op < oe); } match_next: state = next; t = next; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) if (likely(HAVE_IP(6) && HAVE_OP(4))) { COPY4(op, ip); op += t; ip += t; } else #endif { NEED_IP(t + 3); NEED_OP(t); while (t > 0) { *op++ = *ip++; t--; } } } eof_found: *out_len = op - out; return (t != 3 ? LZO_E_ERROR : ip == ip_end ? LZO_E_OK : ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); input_overrun: *out_len = op - out; return LZO_E_INPUT_OVERRUN; output_overrun: *out_len = op - out; return LZO_E_OUTPUT_OVERRUN; lookbehind_overrun: *out_len = op - out; return LZO_E_LOOKBEHIND_OVERRUN; } #ifndef STATIC EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X Decompressor"); #endif |
339 13 5 8 5 2 1 1 7 4 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../kernel/futex/futex.h" #include "io_uring.h" #include "rsrc.h" #include "futex.h" struct io_futex { struct file *file; union { u32 __user *uaddr; struct futex_waitv __user *uwaitv; }; unsigned long futex_val; unsigned long futex_mask; unsigned long futexv_owned; u32 futex_flags; unsigned int futex_nr; bool futexv_unqueued; }; struct io_futex_data { union { struct futex_q q; struct io_cache_entry cache; }; struct io_kiocb *req; }; void io_futex_cache_init(struct io_ring_ctx *ctx) { io_alloc_cache_init(&ctx->futex_cache, IO_NODE_ALLOC_CACHE_MAX, sizeof(struct io_futex_data)); } static void io_futex_cache_entry_free(struct io_cache_entry *entry) { kfree(container_of(entry, struct io_futex_data, cache)); } void io_futex_cache_free(struct io_ring_ctx *ctx) { io_alloc_cache_free(&ctx->futex_cache, io_futex_cache_entry_free); } static void __io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) { req->async_data = NULL; hlist_del_init(&req->hash_node); io_req_task_complete(req, ts); } static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) { struct io_futex_data *ifd = req->async_data; struct io_ring_ctx *ctx = req->ctx; io_tw_lock(ctx, ts); if (!io_alloc_cache_put(&ctx->futex_cache, &ifd->cache)) kfree(ifd); __io_futex_complete(req, ts); } static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct futex_vector *futexv = req->async_data; io_tw_lock(req->ctx, ts); if (!iof->futexv_unqueued) { int res; res = futex_unqueue_multiple(futexv, iof->futex_nr); if (res != -1) io_req_set_res(req, res, 0); } kfree(req->async_data); req->flags &= ~REQ_F_ASYNC_DATA; __io_futex_complete(req, ts); } static bool io_futexv_claim(struct io_futex *iof) { if (test_bit(0, &iof->futexv_owned) || test_and_set_bit_lock(0, &iof->futexv_owned)) return false; return true; } static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) { /* futex wake already done or in progress */ if (req->opcode == IORING_OP_FUTEX_WAIT) { struct io_futex_data *ifd = req->async_data; if (!futex_unqueue(&ifd->q)) return false; req->io_task_work.func = io_futex_complete; } else { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); if (!io_futexv_claim(iof)) return false; req->io_task_work.func = io_futexv_complete; } hlist_del_init(&req->hash_node); io_req_set_res(req, -ECANCELED, 0); io_req_task_work_add(req); return true; } int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, unsigned int issue_flags) { struct hlist_node *tmp; struct io_kiocb *req; int nr = 0; if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) return -ENOENT; io_ring_submit_lock(ctx, issue_flags); hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { if (req->cqe.user_data != cd->data && !(cd->flags & IORING_ASYNC_CANCEL_ANY)) continue; if (__io_futex_cancel(ctx, req)) nr++; if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) break; } io_ring_submit_unlock(ctx, issue_flags); if (nr) return nr; return -ENOENT; } bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, bool cancel_all) { struct hlist_node *tmp; struct io_kiocb *req; bool found = false; lockdep_assert_held(&ctx->uring_lock); hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { if (!io_match_task_safe(req, task, cancel_all)) continue; __io_futex_cancel(ctx, req); found = true; } return found; } int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); u32 flags; if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index || sqe->file_index)) return -EINVAL; iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); iof->futex_val = READ_ONCE(sqe->addr2); iof->futex_mask = READ_ONCE(sqe->addr3); flags = READ_ONCE(sqe->fd); if (flags & ~FUTEX2_VALID_MASK) return -EINVAL; iof->futex_flags = futex2_to_flags(flags); if (!futex_flags_valid(iof->futex_flags)) return -EINVAL; if (!futex_validate_input(iof->futex_flags, iof->futex_val) || !futex_validate_input(iof->futex_flags, iof->futex_mask)) return -EINVAL; return 0; } static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q) { struct io_kiocb *req = q->wake_data; struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); if (!io_futexv_claim(iof)) return; if (unlikely(!__futex_wake_mark(q))) return; io_req_set_res(req, 0, 0); req->io_task_work.func = io_futexv_complete; io_req_task_work_add(req); } int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct futex_vector *futexv; int ret; /* No flags or mask supported for waitv */ if (unlikely(sqe->fd || sqe->buf_index || sqe->file_index || sqe->addr2 || sqe->futex_flags || sqe->addr3)) return -EINVAL; iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); iof->futex_nr = READ_ONCE(sqe->len); if (!iof->futex_nr || iof->futex_nr > FUTEX_WAITV_MAX) return -EINVAL; futexv = kcalloc(iof->futex_nr, sizeof(*futexv), GFP_KERNEL); if (!futexv) return -ENOMEM; ret = futex_parse_waitv(futexv, iof->uwaitv, iof->futex_nr, io_futex_wakev_fn, req); if (ret) { kfree(futexv); return ret; } iof->futexv_owned = 0; iof->futexv_unqueued = 0; req->flags |= REQ_F_ASYNC_DATA; req->async_data = futexv; return 0; } static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q) { struct io_futex_data *ifd = container_of(q, struct io_futex_data, q); struct io_kiocb *req = ifd->req; if (unlikely(!__futex_wake_mark(q))) return; io_req_set_res(req, 0, 0); req->io_task_work.func = io_futex_complete; io_req_task_work_add(req); } static struct io_futex_data *io_alloc_ifd(struct io_ring_ctx *ctx) { struct io_cache_entry *entry; entry = io_alloc_cache_get(&ctx->futex_cache); if (entry) return container_of(entry, struct io_futex_data, cache); return kmalloc(sizeof(struct io_futex_data), GFP_NOWAIT); } int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct futex_vector *futexv = req->async_data; struct io_ring_ctx *ctx = req->ctx; int ret, woken = -1; io_ring_submit_lock(ctx, issue_flags); ret = futex_wait_multiple_setup(futexv, iof->futex_nr, &woken); /* * Error case, ret is < 0. Mark the request as failed. */ if (unlikely(ret < 0)) { io_ring_submit_unlock(ctx, issue_flags); req_set_fail(req); io_req_set_res(req, ret, 0); kfree(futexv); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; return IOU_OK; } /* * 0 return means that we successfully setup the waiters, and that * nobody triggered a wakeup while we were doing so. If the wakeup * happened post setup, the task_work will be run post this issue and * under the submission lock. 1 means We got woken while setting up, * let that side do the completion. Note that * futex_wait_multiple_setup() will have unqueued all the futexes in * this case. Mark us as having done that already, since this is * different from normal wakeup. */ if (!ret) { /* * If futex_wait_multiple_setup() returns 0 for a * successful setup, then the task state will not be * runnable. This is fine for the sync syscall, as * it'll be blocking unless we already got one of the * futexes woken, but it obviously won't work for an * async invocation. Mark us runnable again. */ __set_current_state(TASK_RUNNING); hlist_add_head(&req->hash_node, &ctx->futex_list); } else { iof->futexv_unqueued = 1; if (woken != -1) io_req_set_res(req, woken, 0); } io_ring_submit_unlock(ctx, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct io_ring_ctx *ctx = req->ctx; struct io_futex_data *ifd = NULL; struct futex_hash_bucket *hb; int ret; if (!iof->futex_mask) { ret = -EINVAL; goto done; } io_ring_submit_lock(ctx, issue_flags); ifd = io_alloc_ifd(ctx); if (!ifd) { ret = -ENOMEM; goto done_unlock; } req->async_data = ifd; ifd->q = futex_q_init; ifd->q.bitset = iof->futex_mask; ifd->q.wake = io_futex_wake_fn; ifd->req = req; ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags, &ifd->q, &hb); if (!ret) { hlist_add_head(&req->hash_node, &ctx->futex_list); io_ring_submit_unlock(ctx, issue_flags); futex_queue(&ifd->q, hb); return IOU_ISSUE_SKIP_COMPLETE; } done_unlock: io_ring_submit_unlock(ctx, issue_flags); done: if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); kfree(ifd); return IOU_OK; } int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); int ret; /* * Strict flags - ensure that waking 0 futexes yields a 0 result. * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details. */ ret = futex_wake(iof->uaddr, FLAGS_STRICT | iof->futex_flags, iof->futex_val, iof->futex_mask); if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } |
3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | // SPDX-License-Identifier: GPL-2.0-only /* * Network Service Header * * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc <jbenc@redhat.com> */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/gso.h> #include <net/nsh.h> #include <net/tun_proto.h> int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh) { struct nshhdr *nh; size_t length = nsh_hdr_len(pushed_nh); u8 next_proto; if (skb->mac_len) { next_proto = TUN_P_ETHERNET; } else { next_proto = tun_p_from_eth_p(skb->protocol); if (!next_proto) return -EAFNOSUPPORT; } /* Add the NSH header */ if (skb_cow_head(skb, length) < 0) return -ENOMEM; skb_push(skb, length); nh = (struct nshhdr *)(skb->data); memcpy(nh, pushed_nh, length); nh->np = next_proto; skb_postpush_rcsum(skb, nh, length); skb->protocol = htons(ETH_P_NSH); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_reset_mac_len(skb); return 0; } EXPORT_SYMBOL_GPL(nsh_push); int nsh_pop(struct sk_buff *skb) { struct nshhdr *nh; size_t length; __be16 inner_proto; if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN)) return -ENOMEM; nh = (struct nshhdr *)(skb->data); length = nsh_hdr_len(nh); if (length < NSH_BASE_HDR_LEN) return -EINVAL; inner_proto = tun_p_to_eth_p(nh->np); if (!pskb_may_pull(skb, length)) return -ENOMEM; if (!inner_proto) return -EAFNOSUPPORT; skb_pull_rcsum(skb, length); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_reset_mac_len(skb); skb->protocol = inner_proto; return 0; } EXPORT_SYMBOL_GPL(nsh_pop); static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; unsigned int nsh_len, mac_len; __be16 proto; skb_reset_network_header(skb); mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) goto out; nsh_len = nsh_hdr_len(nsh_hdr(skb)); if (nsh_len < NSH_BASE_HDR_LEN) goto out; if (unlikely(!pskb_may_pull(skb, nsh_len))) goto out; proto = tun_p_to_eth_p(nsh_hdr(skb)->np); if (!proto) goto out; __skb_pull(skb, nsh_len); skb_reset_mac_header(skb); skb->mac_len = proto == htons(ETH_P_TEB) ? ETH_HLEN : 0; skb->protocol = proto; features &= NETIF_F_SG; segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len, mac_offset, mac_len); goto out; } for (skb = segs; skb; skb = skb->next) { skb->protocol = htons(ETH_P_NSH); __skb_push(skb, nsh_len); skb->mac_header = mac_offset; skb->network_header = skb->mac_header + mac_len; skb->mac_len = mac_len; } out: return segs; } static struct packet_offload nsh_packet_offload __read_mostly = { .type = htons(ETH_P_NSH), .priority = 15, .callbacks = { .gso_segment = nsh_gso_segment, }, }; static int __init nsh_init_module(void) { dev_add_offload(&nsh_packet_offload); return 0; } static void __exit nsh_cleanup_module(void) { dev_remove_offload(&nsh_packet_offload); } module_init(nsh_init_module); module_exit(nsh_cleanup_module); MODULE_AUTHOR("Jiri Benc <jbenc@redhat.com>"); MODULE_DESCRIPTION("NSH protocol"); MODULE_LICENSE("GPL v2"); |
9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. */ #ifndef __INCORE_DOT_H__ #define __INCORE_DOT_H__ #include <linux/fs.h> #include <linux/kobject.h> #include <linux/workqueue.h> #include <linux/dlm.h> #include <linux/buffer_head.h> #include <linux/rcupdate.h> #include <linux/rculist_bl.h> #include <linux/completion.h> #include <linux/rbtree.h> #include <linux/ktime.h> #include <linux/percpu.h> #include <linux/lockref.h> #include <linux/rhashtable.h> #include <linux/mutex.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 struct gfs2_log_operations; struct gfs2_bufdata; struct gfs2_holder; struct gfs2_glock; struct gfs2_quota_data; struct gfs2_trans; struct gfs2_jdesc; struct gfs2_sbd; struct lm_lockops; typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); struct gfs2_log_header_host { u64 lh_sequence; /* Sequence number of this transaction */ u32 lh_flags; /* GFS2_LOG_HEAD_... */ u32 lh_tail; /* Block number of log tail */ u32 lh_blkno; s64 lh_local_total; s64 lh_local_free; s64 lh_local_dinodes; }; /* * Structure of operations that are associated with each * type of element in the log. */ struct gfs2_log_operations { void (*lo_before_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); void (*lo_before_scan) (struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass); int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, struct gfs2_log_descriptor *ld, __be64 *ptr, int pass); void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass); const char *lo_name; }; #define GBF_FULL 1 /** * Clone bitmaps (bi_clone): * * - When a block is freed, we remember the previous state of the block in the * clone bitmap, and only mark the block as free in the real bitmap. * * - When looking for a block to allocate, we check for a free block in the * clone bitmap, and if no clone bitmap exists, in the real bitmap. * * - For allocating a block, we mark it as allocated in the real bitmap, and if * a clone bitmap exists, also in the clone bitmap. * * - At the end of a log_flush, we copy the real bitmap into the clone bitmap * to make the clone bitmap reflect the current allocation state. * (Alternatively, we could remove the clone bitmap.) * * The clone bitmaps are in-core only, and is never written to disk. * * These steps ensure that blocks which have been freed in a transaction cannot * be reallocated in that same transaction. */ struct gfs2_bitmap { struct buffer_head *bi_bh; char *bi_clone; unsigned long bi_flags; u32 bi_offset; u32 bi_start; u32 bi_bytes; u32 bi_blocks; }; struct gfs2_rgrpd { struct rb_node rd_node; /* Link with superblock */ struct gfs2_glock *rd_gl; /* Glock for this rgrp */ u64 rd_addr; /* grp block disk address */ u64 rd_data0; /* first data location */ u32 rd_length; /* length of rgrp header in fs blocks */ u32 rd_data; /* num of data blocks in rgrp */ u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; u32 rd_requested; /* number of blocks in rd_rstree */ u32 rd_reserved; /* number of reserved blocks */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; struct gfs2_bitmap *rd_bits; struct gfs2_sbd *rd_sbd; struct gfs2_rgrp_lvb *rd_rgl; u32 rd_last_alloc; u32 rd_flags; u32 rd_extfail_pt; /* extent failure point */ #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ #define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct mutex rd_mutex; struct rb_root rd_rstree; /* multi-block reservation tree */ }; enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, }; BUFFER_FNS(Pinned, pinned) TAS_BUFFER_FNS(Pinned, pinned) BUFFER_FNS(Escaped, escaped) TAS_BUFFER_FNS(Escaped, escaped) struct gfs2_bufdata { struct buffer_head *bd_bh; struct gfs2_glock *bd_gl; u64 bd_blkno; struct list_head bd_list; struct gfs2_trans *bd_tr; struct list_head bd_ail_st_list; struct list_head bd_ail_gl_list; }; /* * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a * prefix of lock_dlm_ gets awkward. */ #define GDLM_STRNAME_BYTES 25 #define GDLM_LVB_SIZE 32 /* * ls_recover_flags: * * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been * held by failed nodes whose journals need recovery. Those locks should * only be used for journal recovery until the journal recovery is done. * This is set by the dlm recover_prep callback and cleared by the * gfs2_control thread when journal recovery is complete. To avoid * races between recover_prep setting and gfs2_control clearing, recover_spin * is held while changing this bit and reading/writing recover_block * and recover_start. * * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used. * * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing * recovery of all journals before allowing other nodes to mount the fs. * This is cleared when FIRST_MOUNT_DONE is set. * * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished * recovery of all journals, and now allows other nodes to mount the fs. * * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared * BLOCK_LOCKS for the first time. The gfs2_control thread should now * control clearing BLOCK_LOCKS for further recoveries. * * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq. * * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep() * and recover_done(), i.e. set while recover_block == recover_start. */ enum { DFL_BLOCK_LOCKS = 0, DFL_NO_DLM_OPS = 1, DFL_FIRST_MOUNT = 2, DFL_FIRST_MOUNT_DONE = 3, DFL_MOUNT_DONE = 4, DFL_UNMOUNT = 5, DFL_DLM_RECOVERY = 6, }; /* * We are using struct lm_lockname as an rhashtable key. Avoid holes within * the struct; padding at the end is fine. */ struct lm_lockname { u64 ln_number; struct gfs2_sbd *ln_sbd; unsigned int ln_type; }; #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ ((name1)->ln_type == (name2)->ln_type) && \ ((name1)->ln_sbd == (name2)->ln_sbd)) struct gfs2_glock_operations { int (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh)(struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_instantiate) (struct gfs2_glock *gl); int (*go_held)(struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); void (*go_free)(struct gfs2_glock *gl); const int go_subclass; const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ #define GLOF_LVB 2 /* Lock Value Block attached */ #define GLOF_LRU 4 /* LRU managed */ #define GLOF_NONDISK 8 /* not I/O related */ }; enum { GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */ GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */ GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */ GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */ GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */ GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */ GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */ GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */ GFS2_NR_LKSTATS }; struct gfs2_lkstats { u64 stats[GFS2_NR_LKSTATS]; }; enum { /* States */ HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ HIF_WAIT = 10, }; struct gfs2_holder { struct list_head gh_list; struct gfs2_glock *gh_gl; struct pid *gh_owner_pid; u16 gh_flags; u16 gh_state; int gh_error; unsigned long gh_iflags; /* HIF_... */ unsigned long gh_ip; }; /* Number of quota types we support */ #define GFS2_MAXQUOTAS 2 struct gfs2_qadata { /* quota allocation data */ /* Quota stuff */ struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS]; struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS]; unsigned int qa_qd_num; int qa_ref; }; /* Resource group multi-block reservation, in order of appearance: Step 1. Function prepares to write, allocates a mb, sets the size hint. Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use Step 4. Bits are assigned from the rgrp based on either the reservation or wherever it can. */ struct gfs2_blkreserv { struct rb_node rs_node; /* node within rd_rstree */ struct gfs2_rgrpd *rs_rgd; u64 rs_start; u32 rs_requested; u32 rs_reserved; /* number of reserved blocks */ }; /* * Allocation parameters * @target: The number of blocks we'd ideally like to allocate * @aflags: The flags (e.g. Orlov flag) * * The intent is to gradually expand this structure over time in * order to give more information, e.g. alignment, min extent size * to the allocation code. */ struct gfs2_alloc_parms { u64 target; u32 min_target; u32 aflags; u64 allowed; }; enum { GLF_LOCK = 1, GLF_INSTANTIATE_NEEDED = 2, /* needs instantiate */ GLF_DEMOTE = 3, GLF_PENDING_DEMOTE = 4, GLF_DEMOTE_IN_PROGRESS = 5, GLF_DIRTY = 6, GLF_LFLUSH = 7, GLF_INVALIDATE_IN_PROGRESS = 8, GLF_REPLY_PENDING = 9, GLF_INITIAL = 10, GLF_FROZEN = 11, GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, GLF_FREEING = 16, /* Wait for glock to be freed */ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ GLF_VERIFY_EVICT = 18, /* iopen glocks only */ }; struct gfs2_glock { unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; struct lockref gl_lockref; /* State fields protected by gl_lockref.lock */ unsigned int gl_state:2, /* Current state */ gl_target:2, /* Target state */ gl_demote_state:2, /* State requested by remote node */ gl_req:2, /* State in last dlm request */ gl_reply:8; /* Last reply from the dlm */ unsigned long gl_demote_time; /* time of first demote request */ long gl_hold_time; struct list_head gl_holders; const struct gfs2_glock_operations *gl_ops; ktime_t gl_dstamp; struct gfs2_lkstats gl_stats; struct dlm_lksb gl_lksb; unsigned long gl_tchange; void *gl_object; struct list_head gl_lru; struct list_head gl_ail_list; atomic_t gl_ail_count; atomic_t gl_revokes; struct delayed_work gl_work; /* For iopen glocks only */ struct { struct delayed_work gl_delete; u64 gl_no_formal_ino; }; struct rcu_head gl_rcu; struct rhash_head gl_node; }; enum { GIF_QD_LOCKED = 1, GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, GIF_DEFERRED_DELETE = 7, }; struct gfs2_inode { struct inode i_inode; u64 i_no_addr; u64 i_no_formal_ino; u64 i_generation; u64 i_eattr; unsigned long i_flags; /* GIF_... */ struct gfs2_glock *i_gl; struct gfs2_holder i_iopen_gh; struct gfs2_qadata *i_qadata; /* quota allocation data */ struct gfs2_holder i_rgd_gh; struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */ u64 i_goal; /* goal block for allocations */ atomic_t i_sizehint; /* hint of the write size */ struct rw_semaphore i_rw_mutex; struct list_head i_ordered; __be64 *i_hash_cache; u32 i_entries; u32 i_diskflags; u8 i_height; u8 i_depth; u16 i_rahead; }; /* * Since i_inode is the first element of struct gfs2_inode, * this is effectively a cast. */ static inline struct gfs2_inode *GFS2_I(struct inode *inode) { return container_of(inode, struct gfs2_inode, i_inode); } static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode) { return inode->i_sb->s_fs_info; } struct gfs2_file { struct mutex f_fl_mutex; struct gfs2_holder f_fl_gh; }; struct gfs2_revoke_replay { struct list_head rr_list; u64 rr_blkno; unsigned int rr_where; }; enum { QDF_CHANGE = 1, QDF_LOCKED = 2, QDF_REFRESH = 3, QDF_QMSG_QUIET = 4, }; struct gfs2_quota_data { struct hlist_bl_node qd_hlist; struct list_head qd_list; struct kqid qd_id; struct gfs2_sbd *qd_sbd; struct lockref qd_lockref; struct list_head qd_lru; unsigned qd_hash; unsigned long qd_flags; /* QDF_... */ s64 qd_change; s64 qd_change_sync; unsigned int qd_slot; unsigned int qd_slot_ref; struct buffer_head *qd_bh; struct gfs2_quota_change *qd_bh_qc; unsigned int qd_bh_count; struct gfs2_glock *qd_gl; struct gfs2_quota_lvb qd_qb; u64 qd_sync_gen; unsigned long qd_last_warn; struct rcu_head qd_rcu; }; enum { TR_TOUCHED = 1, TR_ATTACHED = 2, TR_ONSTACK = 3, }; struct gfs2_trans { unsigned long tr_ip; unsigned int tr_blocks; unsigned int tr_revokes; unsigned int tr_reserved; unsigned long tr_flags; unsigned int tr_num_buf_new; unsigned int tr_num_databuf_new; unsigned int tr_num_buf_rm; unsigned int tr_num_databuf_rm; unsigned int tr_num_revoke; struct list_head tr_list; struct list_head tr_databuf; struct list_head tr_buf; unsigned int tr_first; struct list_head tr_ail1_list; struct list_head tr_ail2_list; }; struct gfs2_journal_extent { struct list_head list; unsigned int lblock; /* First logical block */ u64 dblock; /* First disk block */ u64 blocks; }; struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; unsigned int nr_extents; struct work_struct jd_work; struct inode *jd_inode; struct bio *jd_log_bio; unsigned long jd_flags; #define JDF_RECOVERY 1 unsigned int jd_jid; u32 jd_blocks; int jd_recover_error; /* Replay stuff */ unsigned int jd_found_blocks; unsigned int jd_found_revokes; unsigned int jd_replayed_blocks; struct list_head jd_revoke_list; unsigned int jd_replay_tail; u64 jd_no_addr; }; struct gfs2_statfs_change_host { s64 sc_total; s64 sc_free; s64 sc_dinodes; }; #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF #define GFS2_QUOTA_OFF 0 #define GFS2_QUOTA_ACCOUNT 1 #define GFS2_QUOTA_ON 2 #define GFS2_QUOTA_QUIET 3 /* on but not complaining */ #define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED #define GFS2_DATA_WRITEBACK 1 #define GFS2_DATA_ORDERED 2 #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW #define GFS2_ERRORS_WITHDRAW 0 #define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ #define GFS2_ERRORS_RO 2 /* place holder for future feature */ #define GFS2_ERRORS_PANIC 3 struct gfs2_args { char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ unsigned int ar_spectator:1; /* Don't get a journal */ unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ unsigned int ar_debug:1; /* Oops on errors */ unsigned int ar_posix_acl:1; /* Enable posix acls */ unsigned int ar_quota:2; /* off/account/on */ unsigned int ar_suiddir:1; /* suiddir support */ unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */ unsigned int ar_loccookie:1; /* use location based readdir cookies */ s32 ar_commit; /* Commit interval */ s32 ar_statfs_quantum; /* The fast statfs interval */ s32 ar_quota_quantum; /* The quota interval */ s32 ar_statfs_percent; /* The % change to force sync */ }; struct gfs2_tune { spinlock_t gt_spin; unsigned int gt_logd_secs; unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ unsigned int gt_quota_scale_num; /* Numerator */ unsigned int gt_quota_scale_den; /* Denominator */ unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ unsigned int gt_new_files_jdata; unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ unsigned int gt_complain_secs; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; }; enum { SDF_JOURNAL_CHECKED = 0, SDF_JOURNAL_LIVE = 1, SDF_WITHDRAWN = 2, SDF_NOBARRIERS = 3, SDF_NORECOVERY = 4, SDF_DEMOTE = 5, SDF_NOJOURNALID = 6, SDF_RORECOVERY = 7, /* read only recovery */ SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, SDF_FREEZE_INITIATOR = 10, SDF_WITHDRAWING = 11, /* Will withdraw eventually */ SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are withdrawing */ SDF_KILL = 15, SDF_EVICTING = 16, SDF_FROZEN = 17, }; #define GFS2_FSNAME_LEN 256 struct gfs2_inum_host { u64 no_formal_ino; u64 no_addr; }; struct gfs2_sb_host { u32 sb_magic; u32 sb_type; u32 sb_fs_format; u32 sb_multihost_format; u32 sb_bsize; u32 sb_bsize_shift; struct gfs2_inum_host sb_master_dir; struct gfs2_inum_host sb_root_dir; char sb_lockproto[GFS2_LOCKNAME_LEN]; char sb_locktable[GFS2_LOCKNAME_LEN]; }; /* * lm_mount() return values * * ls_jid - the journal ID this node should use * ls_first - this node is the first to mount the file system * ls_lockspace - lock module's context for this file system * ls_ops - lock module's functions */ struct lm_lockstruct { int ls_jid; unsigned int ls_first; const struct lm_lockops *ls_ops; dlm_lockspace_t *ls_dlm; int ls_recover_jid_done; /* These two are deprecated, */ int ls_recover_jid_status; /* used previously by gfs_controld */ struct dlm_lksb ls_mounted_lksb; /* mounted_lock */ struct dlm_lksb ls_control_lksb; /* control_lock */ char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ char *ls_lvb_bits; spinlock_t ls_recover_spin; /* protects following fields */ unsigned long ls_recover_flags; /* DFL_ */ uint32_t ls_recover_mount; /* gen in first recover_done cb */ uint32_t ls_recover_start; /* gen in last recover_done cb */ uint32_t ls_recover_block; /* copy recover_start in last recover_prep */ uint32_t ls_recover_size; /* size of recover_submit, recover_result */ uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */ uint32_t *ls_recover_result; /* result of last jid recovery */ }; struct gfs2_pcpu_lkstats { /* One struct for each glock type */ struct gfs2_lkstats lkstats[10]; }; /* List of local (per node) statfs inodes */ struct local_statfs_inode { struct list_head si_list; struct inode *si_sc_inode; unsigned int si_jid; /* journal id this statfs inode corresponds to */ }; struct gfs2_sbd { struct super_block *sd_vfs; struct gfs2_pcpu_lkstats __percpu *sd_lkstats; struct kobject sd_kobj; struct completion sd_kobj_unregister; unsigned long sd_flags; /* SDF_... */ struct gfs2_sb_host sd_sb; /* Constants computed on mount */ u32 sd_fsb2bb; u32 sd_fsb2bb_shift; u32 sd_diptrs; /* Number of pointers in a dinode */ u32 sd_inptrs; /* Number of pointers in a indirect block */ u32 sd_ldptrs; /* Number of pointers in a log descriptor block */ u32 sd_jbsize; /* Size of a journaled data block */ u32 sd_hash_bsize; /* sizeof(exhash block) */ u32 sd_hash_bsize_shift; u32 sd_hash_ptrs; /* Number of pointers in a hash block */ u32 sd_qc_per_block; u32 sd_blocks_per_bitmap; u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ u32 sd_max_height; /* Max height of a file's metadata tree */ u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */ struct gfs2_args sd_args; /* Mount arguments */ struct gfs2_tune sd_tune; /* Filesystem tuning structure */ /* Lock Stuff */ struct lm_lockstruct sd_lockstruct; struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_freeze_gl; struct work_struct sd_freeze_work; wait_queue_head_t sd_kill_wait; wait_queue_head_t sd_async_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; struct completion sd_wdack; struct delayed_work sd_control_work; /* Inode Stuff */ struct dentry *sd_master_dir; struct dentry *sd_root_dir; struct inode *sd_jindex; struct inode *sd_statfs_inode; struct inode *sd_sc_inode; struct list_head sd_sc_inodes_list; struct inode *sd_qc_inode; struct inode *sd_rindex; struct inode *sd_quota_inode; /* StatFS stuff */ spinlock_t sd_statfs_spin; struct gfs2_statfs_change_host sd_statfs_master; struct gfs2_statfs_change_host sd_statfs_local; int sd_statfs_force_sync; /* Resource group stuff */ int sd_rindex_uptodate; spinlock_t sd_rindex_spin; struct rb_root sd_rindex_tree; unsigned int sd_rgrps; unsigned int sd_max_rg_data; /* Journal index stuff */ struct list_head sd_jindex_list; spinlock_t sd_jindex_spin; struct mutex sd_jindex_mutex; unsigned int sd_journals; struct gfs2_jdesc *sd_jdesc; struct gfs2_holder sd_journal_gh; struct gfs2_holder sd_jinode_gh; struct gfs2_glock *sd_jinode_gl; struct gfs2_holder sd_sc_gh; struct buffer_head *sd_sc_bh; struct gfs2_holder sd_qc_gh; struct completion sd_journal_ready; /* Workqueue stuff */ struct workqueue_struct *sd_delete_wq; /* Daemon stuff */ struct task_struct *sd_logd_process; struct task_struct *sd_quotad_process; /* Quota stuff */ struct list_head sd_quota_list; atomic_t sd_quota_count; struct mutex sd_quota_mutex; struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; unsigned int sd_quota_slots; unsigned long *sd_quota_bitmap; spinlock_t sd_bitmap_lock; u64 sd_quota_sync_gen; /* Log stuff */ struct address_space sd_aspace; spinlock_t sd_log_lock; struct gfs2_trans *sd_log_tr; unsigned int sd_log_blks_reserved; atomic_t sd_log_pinned; unsigned int sd_log_num_revoke; struct list_head sd_log_revokes; struct list_head sd_log_ordered; spinlock_t sd_ordered_lock; atomic_t sd_log_thresh1; atomic_t sd_log_thresh2; atomic_t sd_log_blks_free; atomic_t sd_log_blks_needed; atomic_t sd_log_revokes_available; wait_queue_head_t sd_log_waitq; wait_queue_head_t sd_logd_waitq; u64 sd_log_sequence; int sd_log_idle; struct rw_semaphore sd_log_flush_lock; atomic_t sd_log_in_flight; wait_queue_head_t sd_log_flush_wait; int sd_log_error; /* First log error */ wait_queue_head_t sd_withdraw_wait; unsigned int sd_log_tail; unsigned int sd_log_flush_tail; unsigned int sd_log_head; unsigned int sd_log_flush_head; spinlock_t sd_ail_lock; struct list_head sd_ail1_list; struct list_head sd_ail2_list; /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; struct mutex sd_freeze_mutex; char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; char sd_proto_name[GFS2_FSNAME_LEN]; /* Debugging crud */ unsigned long sd_last_warning; struct dentry *debugfs_dir; /* debugfs directory */ unsigned long sd_glock_dqs_held; }; static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) { gl->gl_stats.stats[which]++; } static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which) { const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; preempt_disable(); this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++; preempt_enable(); } struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl); static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip) { return GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); } #endif /* __INCORE_DOT_H__ */ |
59 69 54 7 2 6 22 63 1 8 8 2 2 9 3 97 97 96 17 15 2 17 5 16 5 16 8 4 2 4 2 7 9 16 40 16 27 14 13 1 9 1 2 3 1 4 2 2 11 1 3 2 5 13 1 2 2 29 1 1 5 5 1 1 1 1 78 34 4 2 1 2 1 2 7 6 244 2 2 2 1 2 2 2 1 4 283 13 7 20 20 3 4 2 28 4 24 3 1 5 5 19 12 1 10 4 1 1 1 3 3 1 2 1 1 1 4 14 1 13 1 2 1 17 9 1 1 3 17 17 1 17 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 | // SPDX-License-Identifier: GPL-2.0-only /* * net/dccp/proto.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/dccp.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/random.h> #include <linux/slab.h> #include <net/checksum.h> #include <net/inet_sock.h> #include <net/inet_common.h> #include <net/sock.h> #include <net/xfrm.h> #include <asm/ioctls.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/delay.h> #include <linux/poll.h> #include "ccid.h" #include "dccp.h" #include "feat.h" #define CREATE_TRACE_POINTS #include "trace.h" DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); DEFINE_PER_CPU(unsigned int, dccp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); /* the maximum queue length for tx in packets. 0 is no limit */ int sysctl_dccp_tx_qlen __read_mostly = 5; #ifdef CONFIG_IP_DCCP_DEBUG static const char *dccp_state_name(const int state) { static const char *const dccp_state_names[] = { [DCCP_OPEN] = "OPEN", [DCCP_REQUESTING] = "REQUESTING", [DCCP_PARTOPEN] = "PARTOPEN", [DCCP_LISTEN] = "LISTEN", [DCCP_RESPOND] = "RESPOND", [DCCP_CLOSING] = "CLOSING", [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", [DCCP_TIME_WAIT] = "TIME_WAIT", [DCCP_CLOSED] = "CLOSED", }; if (state >= DCCP_MAX_STATES) return "INVALID STATE!"; else return dccp_state_names[state]; } #endif void dccp_set_state(struct sock *sk, const int state) { const int oldstate = sk->sk_state; dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk, dccp_state_name(oldstate), dccp_state_name(state)); WARN_ON(state == oldstate); switch (state) { case DCCP_OPEN: if (oldstate != DCCP_OPEN) DCCP_INC_STATS(DCCP_MIB_CURRESTAB); /* Client retransmits all Confirm options until entering OPEN */ if (oldstate == DCCP_PARTOPEN) dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg); break; case DCCP_CLOSED: if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ || oldstate == DCCP_CLOSING) DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); if (inet_csk(sk)->icsk_bind_hash != NULL && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); fallthrough; default: if (oldstate == DCCP_OPEN) DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); } /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ inet_sk_set_state(sk, state); } EXPORT_SYMBOL_GPL(dccp_set_state); static void dccp_finish_passive_close(struct sock *sk) { switch (sk->sk_state) { case DCCP_PASSIVE_CLOSE: /* Node (client or server) has received Close packet. */ dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); dccp_set_state(sk, DCCP_CLOSED); break; case DCCP_PASSIVE_CLOSEREQ: /* * Client received CloseReq. We set the `active' flag so that * dccp_send_close() retransmits the Close as per RFC 4340, 8.3. */ dccp_send_close(sk, 1); dccp_set_state(sk, DCCP_CLOSING); } } void dccp_done(struct sock *sk) { dccp_set_state(sk, DCCP_CLOSED); dccp_clear_xmit_timers(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); else inet_csk_destroy_sock(sk); } EXPORT_SYMBOL_GPL(dccp_done); const char *dccp_packet_name(const int type) { static const char *const dccp_packet_names[] = { [DCCP_PKT_REQUEST] = "REQUEST", [DCCP_PKT_RESPONSE] = "RESPONSE", [DCCP_PKT_DATA] = "DATA", [DCCP_PKT_ACK] = "ACK", [DCCP_PKT_DATAACK] = "DATAACK", [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", [DCCP_PKT_CLOSE] = "CLOSE", [DCCP_PKT_RESET] = "RESET", [DCCP_PKT_SYNC] = "SYNC", [DCCP_PKT_SYNCACK] = "SYNCACK", }; if (type >= DCCP_NR_PKT_TYPES) return "INVALID"; else return dccp_packet_names[type]; } EXPORT_SYMBOL_GPL(dccp_packet_name); void dccp_destruct_common(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_tx_ccid = NULL; } EXPORT_SYMBOL_GPL(dccp_destruct_common); static void dccp_sk_destruct(struct sock *sk) { dccp_destruct_common(sk); inet_sock_destruct(sk); } int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; sk->sk_destruct = dccp_sk_destruct; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); INIT_LIST_HEAD(&dp->dccps_featneg); /* control socket doesn't need feat nego */ if (likely(ctl_sock_initialized)) return dccp_feat_init(sk); return 0; } EXPORT_SYMBOL_GPL(dccp_init_sock); void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } /* Clean up a referenced DCCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(sk); kfree(dp->dccps_service_list); dp->dccps_service_list = NULL; if (dp->dccps_hc_rx_ackvec != NULL) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; } ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); dp->dccps_hc_rx_ccid = NULL; /* clean up feature negotiation state */ dccp_feat_list_purge(&dp->dccps_featneg); } EXPORT_SYMBOL_GPL(dccp_destroy_sock); static inline int dccp_need_reset(int state) { return state != DCCP_CLOSED && state != DCCP_LISTEN && state != DCCP_REQUESTING; } int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const int old_state = sk->sk_state; if (old_state != DCCP_CLOSED) dccp_set_state(sk, DCCP_CLOSED); /* * This corresponds to the ABORT function of RFC793, sec. 3.8 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted". */ if (old_state == DCCP_LISTEN) { inet_csk_listen_stop(sk); } else if (dccp_need_reset(old_state)) { dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); sk->sk_err = ECONNRESET; } else if (old_state == DCCP_REQUESTING) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); dp->dccps_hc_rx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } inet->inet_dport = 0; inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); icsk->icsk_backoff = 0; inet_csk_delack_init(sk); __sk_dst_reset(sk); WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk_error_report(sk); return 0; } EXPORT_SYMBOL_GPL(dccp_disconnect); /* * Wait for a DCCP event. * * Note that we don't need to lock the socket, as the upper poll layers * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ __poll_t dccp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask; u8 shutdown; int state; sock_poll_wait(file, sock, wait); state = inet_sk_state_load(sk); if (state == DCCP_LISTEN) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events by poll logic and correct handling of state changes made by another threads is impossible in any case. */ mask = 0; if (READ_ONCE(sk->sk_err)) mask = EPOLLERR; shutdown = READ_ONCE(sk->sk_shutdown); if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED) mask |= EPOLLHUP; if (shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected? */ if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { if (atomic_read(&sk->sk_rmem_alloc) > 0) mask |= EPOLLIN | EPOLLRDNORM; if (!(shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after * wspace test but before the flags are set, * IO signal will be lost. */ if (sk_stream_is_writeable(sk)) mask |= EPOLLOUT | EPOLLWRNORM; } } } return mask; } EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, int *karg) { int rc = -ENOTCONN; lock_sock(sk); if (sk->sk_state == DCCP_LISTEN) goto out; switch (cmd) { case SIOCOUTQ: { *karg = sk_wmem_alloc_get(sk); /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and * always 0, comparably to UDP. */ rc = 0; } break; case SIOCINQ: { struct sk_buff *skb; *karg = 0; skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { /* * We will only return the amount of this packet since * that is all that will be read. */ *karg = skb->len; } rc = 0; } break; default: rc = -ENOIOCTLCMD; break; } out: release_sock(sk); return rc; } EXPORT_SYMBOL_GPL(dccp_ioctl); static int dccp_setsockopt_service(struct sock *sk, const __be32 service, sockptr_t optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_service_list *sl = NULL; if (service == DCCP_SERVICE_INVALID_VALUE || optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) return -EINVAL; if (optlen > sizeof(service)) { sl = kmalloc(optlen, GFP_KERNEL); if (sl == NULL) return -ENOMEM; sl->dccpsl_nr = optlen / sizeof(u32) - 1; if (copy_from_sockptr_offset(sl->dccpsl_list, optval, sizeof(service), optlen - sizeof(service)) || dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { kfree(sl); return -EFAULT; } } lock_sock(sk); dp->dccps_service = service; kfree(dp->dccps_service_list); dp->dccps_service_list = sl; release_sock(sk); return 0; } static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) { u8 *list, len; int i, rc; if (cscov < 0 || cscov > 15) return -EINVAL; /* * Populate a list of permissible values, in the range cscov...15. This * is necessary since feature negotiation of single values only works if * both sides incidentally choose the same value. Since the list starts * lowest-value first, negotiation will pick the smallest shared value. */ if (cscov == 0) return 0; len = 16 - cscov; list = kmalloc(len, GFP_KERNEL); if (list == NULL) return -ENOBUFS; for (i = 0; i < len; i++) list[i] = cscov++; rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); if (rc == 0) { if (rx) dccp_sk(sk)->dccps_pcrlen = cscov; else dccp_sk(sk)->dccps_pcslen = cscov; } kfree(list); return rc; } static int dccp_setsockopt_ccid(struct sock *sk, int type, sockptr_t optval, unsigned int optlen) { u8 *val; int rc = 0; if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) return -EINVAL; val = memdup_sockptr(optval, optlen); if (IS_ERR(val)) return PTR_ERR(val); lock_sock(sk); if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); release_sock(sk); kfree(val); return rc; } static int do_dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); int val, err = 0; switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); return 0; case DCCP_SOCKOPT_CHANGE_L: case DCCP_SOCKOPT_CHANGE_R: DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); return 0; case DCCP_SOCKOPT_CCID: case DCCP_SOCKOPT_RX_CCID: case DCCP_SOCKOPT_TX_CCID: return dccp_setsockopt_ccid(sk, optname, optval, optlen); } if (optlen < (int)sizeof(int)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; if (optname == DCCP_SOCKOPT_SERVICE) return dccp_setsockopt_service(sk, val, optval, optlen); lock_sock(sk); switch (optname) { case DCCP_SOCKOPT_SERVER_TIMEWAIT: if (dp->dccps_role != DCCP_ROLE_SERVER) err = -EOPNOTSUPP; else dp->dccps_server_timewait = (val != 0); break; case DCCP_SOCKOPT_SEND_CSCOV: err = dccp_setsockopt_cscov(sk, val, false); break; case DCCP_SOCKOPT_RECV_CSCOV: err = dccp_setsockopt_cscov(sk, val, true); break; case DCCP_SOCKOPT_QPOLICY_ID: if (sk->sk_state != DCCP_CLOSED) err = -EISCONN; else if (val < 0 || val >= DCCPQ_POLICY_MAX) err = -EINVAL; else dp->dccps_qpolicy = val; break; case DCCP_SOCKOPT_QPOLICY_TXQLEN: if (val < 0) err = -EINVAL; else dp->dccps_tx_qlen = val; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { if (level != SOL_DCCP) return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen); return do_dccp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL_GPL(dccp_setsockopt); static int dccp_getsockopt_service(struct sock *sk, int len, __be32 __user *optval, int __user *optlen) { const struct dccp_sock *dp = dccp_sk(sk); const struct dccp_service_list *sl; int err = -ENOENT, slen = 0, total_len = sizeof(u32); lock_sock(sk); if ((sl = dp->dccps_service_list) != NULL) { slen = sl->dccpsl_nr * sizeof(u32); total_len += slen; } err = -EINVAL; if (total_len > len) goto out; err = 0; if (put_user(total_len, optlen) || put_user(dp->dccps_service, optval) || (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) err = -EFAULT; out: release_sock(sk); return err; } static int do_dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct dccp_sock *dp; int val, len; if (get_user(len, optlen)) return -EFAULT; if (len < (int)sizeof(int)) return -EINVAL; dp = dccp_sk(sk); switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); return 0; case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); case DCCP_SOCKOPT_GET_CUR_MPS: val = READ_ONCE(dp->dccps_mss_cache); break; case DCCP_SOCKOPT_AVAILABLE_CCIDS: return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); case DCCP_SOCKOPT_TX_CCID: val = ccid_get_current_tx_ccid(dp); if (val < 0) return -ENOPROTOOPT; break; case DCCP_SOCKOPT_RX_CCID: val = ccid_get_current_rx_ccid(dp); if (val < 0) return -ENOPROTOOPT; break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: val = dp->dccps_server_timewait; break; case DCCP_SOCKOPT_SEND_CSCOV: val = dp->dccps_pcslen; break; case DCCP_SOCKOPT_RECV_CSCOV: val = dp->dccps_pcrlen; break; case DCCP_SOCKOPT_QPOLICY_ID: val = dp->dccps_qpolicy; break; case DCCP_SOCKOPT_QPOLICY_TXQLEN: val = dp->dccps_tx_qlen; break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); case 192 ... 255: return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, len, (u32 __user *)optval, optlen); default: return -ENOPROTOOPT; } len = sizeof(val); if (put_user(len, optlen) || copy_to_user(optval, &val, len)) return -EFAULT; return 0; } int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level != SOL_DCCP) return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level, optname, optval, optlen); return do_dccp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL_GPL(dccp_getsockopt); static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) { struct cmsghdr *cmsg; /* * Assign an (opaque) qpolicy priority value to skb->priority. * * We are overloading this skb field for use with the qpolicy subystem. * The skb->priority is normally used for the SO_PRIORITY option, which * is initialised from sk_priority. Since the assignment of sk_priority * to skb->priority happens later (on layer 3), we overload this field * for use with queueing priorities as long as the skb is on layer 4. * The default priority value (if nothing is set) is 0. */ skb->priority = 0; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_DCCP) continue; if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) return -EINVAL; switch (cmsg->cmsg_type) { case DCCP_SCM_PRIORITY: if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) return -EINVAL; skb->priority = *(__u32 *)CMSG_DATA(cmsg); break; default: return -EINVAL; } } return 0; } int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { const struct dccp_sock *dp = dccp_sk(sk); const int flags = msg->msg_flags; const int noblock = flags & MSG_DONTWAIT; struct sk_buff *skb; int rc, size; long timeo; trace_dccp_probe(sk, len); if (len > READ_ONCE(dp->dccps_mss_cache)) return -EMSGSIZE; lock_sock(sk); timeo = sock_sndtimeo(sk, noblock); /* * We have to use sk_stream_wait_connect here to set sk_write_pending, * so that the trick in dccp_rcv_request_sent_state_process. */ /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_release; size = sk->sk_prot->max_header + len; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (skb == NULL) goto out_release; if (dccp_qpolicy_full(sk)) { rc = -EAGAIN; goto out_discard; } if (sk->sk_state == DCCP_CLOSED) { rc = -ENOTCONN; goto out_discard; } /* We need to check dccps_mss_cache after socket is locked. */ if (len > dp->dccps_mss_cache) { rc = -EMSGSIZE; goto out_discard; } skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc != 0) goto out_discard; rc = dccp_msghdr_parse(msg, skb); if (rc != 0) goto out_discard; dccp_qpolicy_push(sk, skb); /* * The xmit_timer is set if the TX CCID is rate-based and will expire * when congestion control permits to release further packets into the * network. Window-based CCIDs do not use this timer. */ if (!timer_pending(&dp->dccps_xmit_timer)) dccp_write_xmit(sk); out_release: release_sock(sk); return rc ? : len; out_discard: kfree_skb(skb); goto out_release; } EXPORT_SYMBOL_GPL(dccp_sendmsg); int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { const struct dccp_hdr *dh; long timeo; lock_sock(sk); if (sk->sk_state == DCCP_LISTEN) { len = -ENOTCONN; goto out; } timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); if (skb == NULL) goto verify_sock_status; dh = dccp_hdr(skb); switch (dh->dccph_type) { case DCCP_PKT_DATA: case DCCP_PKT_DATAACK: goto found_ok_skb; case DCCP_PKT_CLOSE: case DCCP_PKT_CLOSEREQ: if (!(flags & MSG_PEEK)) dccp_finish_passive_close(sk); fallthrough; case DCCP_PKT_RESET: dccp_pr_debug("found fin (%s) ok!\n", dccp_packet_name(dh->dccph_type)); len = 0; goto found_fin_ok; default: dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); sk_eat_skb(sk, skb); } verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { len = 0; break; } if (sk->sk_err) { len = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) { len = 0; break; } if (sk->sk_state == DCCP_CLOSED) { if (!sock_flag(sk, SOCK_DONE)) { /* This occurs when user tries to read * from never connected socket. */ len = -ENOTCONN; break; } len = 0; break; } if (!timeo) { len = -EAGAIN; break; } if (signal_pending(current)) { len = sock_intr_errno(timeo); break; } sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) len = skb->len; else if (len < skb->len) msg->msg_flags |= MSG_TRUNC; if (skb_copy_datagram_msg(skb, 0, msg, len)) { /* Exception. Bailout! */ len = -EFAULT; break; } if (flags & MSG_TRUNC) len = skb->len; found_fin_ok: if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; } while (1); out: release_sock(sk); return len; } EXPORT_SYMBOL_GPL(dccp_recvmsg); int inet_dccp_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; unsigned char old_state; int err; lock_sock(sk); err = -EINVAL; if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) goto out; old_state = sk->sk_state; if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) goto out; WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ if (old_state != DCCP_LISTEN) { struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; /* do not start to listen if feature negotiation setup fails */ if (dccp_feat_finalise_settings(dp)) { err = -EPROTO; goto out; } err = inet_csk_listen_start(sk); if (err) goto out; } err = 0; out: release_sock(sk); return err; } EXPORT_SYMBOL_GPL(inet_dccp_listen); static void dccp_terminate_connection(struct sock *sk) { u8 next_state = DCCP_CLOSED; switch (sk->sk_state) { case DCCP_PASSIVE_CLOSE: case DCCP_PASSIVE_CLOSEREQ: dccp_finish_passive_close(sk); break; case DCCP_PARTOPEN: dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); fallthrough; case DCCP_OPEN: dccp_send_close(sk, 1); if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER && !dccp_sk(sk)->dccps_server_timewait) next_state = DCCP_ACTIVE_CLOSEREQ; else next_state = DCCP_CLOSING; fallthrough; default: dccp_set_state(sk, next_state); } } void dccp_close(struct sock *sk, long timeout) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; u32 data_was_unread = 0; int state; lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == DCCP_LISTEN) { dccp_set_state(sk, DCCP_CLOSED); /* Special case. */ inet_csk_listen_stop(sk); goto adjudge_to_death; } sk_stop_timer(sk, &dp->dccps_xmit_timer); /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the *reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { data_was_unread += skb->len; __kfree_skb(skb); } /* If socket has been already reset kill it. */ if (sk->sk_state == DCCP_CLOSED) goto adjudge_to_death; if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); dccp_set_state(sk, DCCP_CLOSED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (sk->sk_state != DCCP_CLOSED) { /* * Normal connection termination. May need to wait if there are * still packets in the TX queue that are delayed by the CCID. */ dccp_flush_write_queue(sk, &timeout); dccp_terminate_connection(sk); } /* * Flush write queue. This may be necessary in several cases: * - we have been closed by the peer but still have application data; * - abortive termination (unread data or zero linger time), * - normal termination but queue could not be flushed within time limit */ __skb_queue_purge(&sk->sk_write_queue); sk_stream_wait_close(sk, timeout); adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); /* * It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); this_cpu_inc(dccp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) goto out; if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); local_bh_enable(); sock_put(sk); } EXPORT_SYMBOL_GPL(dccp_close); void dccp_shutdown(struct sock *sk, int how) { dccp_pr_debug("called shutdown(%x)\n", how); } EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int __init dccp_mib_init(void) { dccp_statistics = alloc_percpu(struct dccp_mib); if (!dccp_statistics) return -ENOMEM; return 0; } static inline void dccp_mib_exit(void) { free_percpu(dccp_statistics); } static int thash_entries; module_param(thash_entries, int, 0444); MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG bool dccp_debug; module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); #endif static int __init dccp_init(void) { unsigned long goal; unsigned long nr_pages = totalram_pages(); int ehash_order, bhash_order, i; int rc; BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > sizeof_field(struct sk_buff, cb)); rc = inet_hashinfo2_init_mod(&dccp_hashinfo); if (rc) goto out_fail; rc = -ENOBUFS; dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_free_hashinfo2; dccp_hashinfo.bind2_bucket_cachep = kmem_cache_create("dccp_bind2_bucket", sizeof(struct inet_bind2_bucket), 0, SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!dccp_hashinfo.bind2_bucket_cachep) goto out_free_bind_bucket_cachep; /* * Size and allocate the main established and bind bucket * hash tables. * * The methodology is similar to that of the buffer cache. */ if (nr_pages >= (128 * 1024)) goal = nr_pages >> (21 - PAGE_SHIFT); else goal = nr_pages >> (23 - PAGE_SHIFT); if (thash_entries) goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) ; do { unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); while (hash_size & (hash_size - 1)) hash_size--; dccp_hashinfo.ehash_mask = hash_size - 1; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); } while (!dccp_hashinfo.ehash && --ehash_order > 0); if (!dccp_hashinfo.ehash) { DCCP_CRIT("Failed to allocate DCCP established hash table"); goto out_free_bind2_bucket_cachep; } for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); if (inet_ehash_locks_alloc(&dccp_hashinfo)) goto out_free_dccp_ehash; bhash_order = ehash_order; do { dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / sizeof(struct inet_bind_hashbucket); if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) continue; dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order); } while (!dccp_hashinfo.bhash && --bhash_order >= 0); if (!dccp_hashinfo.bhash) { DCCP_CRIT("Failed to allocate DCCP bind hash table"); goto out_free_dccp_locks; } dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *) __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order); if (!dccp_hashinfo.bhash2) { DCCP_CRIT("Failed to allocate DCCP bind2 hash table"); goto out_free_dccp_bhash; } for (i = 0; i < dccp_hashinfo.bhash_size; i++) { spin_lock_init(&dccp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); spin_lock_init(&dccp_hashinfo.bhash2[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain); } dccp_hashinfo.pernet = false; rc = dccp_mib_init(); if (rc) goto out_free_dccp_bhash2; rc = dccp_ackvec_init(); if (rc) goto out_free_dccp_mib; rc = dccp_sysctl_init(); if (rc) goto out_ackvec_exit; rc = ccid_initialize_builtins(); if (rc) goto out_sysctl_exit; dccp_timestamping_init(); return 0; out_sysctl_exit: dccp_sysctl_exit(); out_ackvec_exit: dccp_ackvec_exit(); out_free_dccp_mib: dccp_mib_exit(); out_free_dccp_bhash2: free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); out_free_dccp_locks: inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); out_free_bind2_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep); out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); out_free_hashinfo2: inet_hashinfo2_free_mod(&dccp_hashinfo); out_fail: dccp_hashinfo.bhash = NULL; dccp_hashinfo.bhash2 = NULL; dccp_hashinfo.ehash = NULL; dccp_hashinfo.bind_bucket_cachep = NULL; dccp_hashinfo.bind2_bucket_cachep = NULL; return rc; } static void __exit dccp_fini(void) { int bhash_order = get_order(dccp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); ccid_cleanup_builtins(); dccp_mib_exit(); free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); free_pages((unsigned long)dccp_hashinfo.ehash, get_order((dccp_hashinfo.ehash_mask + 1) * sizeof(struct inet_ehash_bucket))); inet_ehash_locks_free(&dccp_hashinfo); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); inet_hashinfo2_free_mod(&dccp_hashinfo); } module_init(dccp_init); module_exit(dccp_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); |
15075 16746 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | /* SPDX-License-Identifier: GPL-2.0 */ /* * generic net pointers */ #ifndef __NET_GENERIC_H__ #define __NET_GENERIC_H__ #include <linux/bug.h> #include <linux/rcupdate.h> #include <net/net_namespace.h> /* * Generic net pointers are to be used by modules to put some private * stuff on the struct net without explicit struct net modification * * The rules are simple: * 1. set pernet_operations->id. After register_pernet_device you * will have the id of your private pointer. * 2. set pernet_operations->size to have the code allocate and free * a private structure pointed to from struct net. * 3. do not change this pointer while the net is alive; * 4. do not try to have any private reference on the net_generic object. * * After accomplishing all of the above, the private pointer can be * accessed with the net_generic() call. */ struct net_generic { union { struct { unsigned int len; struct rcu_head rcu; } s; DECLARE_FLEX_ARRAY(void *, ptr); }; }; static inline void *net_generic(const struct net *net, unsigned int id) { struct net_generic *ng; void *ptr; rcu_read_lock(); ng = rcu_dereference(net->gen); ptr = ng->ptr[id]; rcu_read_unlock(); return ptr; } #endif |
4 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #ifndef __XFS_REFCOUNT_H__ #define __XFS_REFCOUNT_H__ struct xfs_trans; struct xfs_mount; struct xfs_perag; struct xfs_btree_cur; struct xfs_bmbt_irec; struct xfs_refcount_irec; extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); static inline uint32_t xfs_refcount_encode_startblock( xfs_agblock_t startblock, enum xfs_refc_domain domain) { uint32_t start; /* * low level btree operations need to handle the generic btree range * query functions (which set rc_domain == -1U), so we check that the * domain is /not/ shared. */ start = startblock & ~XFS_REFC_COWFLAG; if (domain != XFS_REFC_DOMAIN_SHARED) start |= XFS_REFC_COWFLAG; return start; } enum xfs_refcount_intent_type { XFS_REFCOUNT_INCREASE = 1, XFS_REFCOUNT_DECREASE, XFS_REFCOUNT_ALLOC_COW, XFS_REFCOUNT_FREE_COW, }; struct xfs_refcount_intent { struct list_head ri_list; struct xfs_perag *ri_pag; enum xfs_refcount_intent_type ri_type; xfs_extlen_t ri_blockcount; xfs_fsblock_t ri_startblock; }; /* Check that the refcount is appropriate for the record domain. */ static inline bool xfs_refcount_check_domain( const struct xfs_refcount_irec *irec) { if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) return false; if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) return false; return true; } void xfs_refcount_update_get_group(struct xfs_mount *mp, struct xfs_refcount_intent *ri); void xfs_refcount_increase_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); void xfs_refcount_decrease_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); extern int xfs_refcount_finish_one(struct xfs_trans *tp, struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur); extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_end_of_shared); void xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, xfs_extlen_t len); void xfs_refcount_free_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, xfs_extlen_t len); extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, struct xfs_perag *pag); /* * While we're adjusting the refcounts records of an extent, we have * to keep an eye on the number of extents we're dirtying -- run too * many in a single transaction and we'll exceed the transaction's * reservation and crash the fs. Each record adds 12 bytes to the * log (plus any key updates) so we'll conservatively assume 32 bytes * per record. We must also leave space for btree splits on both ends * of the range and space for the CUD and a new CUI. * * Each EFI that we attach to the transaction is assumed to consume ~32 bytes. * This is a low estimate for an EFI tracking a single extent (16 bytes for the * EFI header, 16 for the extent, and 12 for the xlog op header), but the * estimate is acceptable if there's more than one extent being freed. * In the worst case of freeing every other block during a refcount decrease * operation, we amortize the space used for one EFI log item across 16 * extents. */ #define XFS_REFCOUNT_ITEM_OVERHEAD 32 extern int xfs_refcount_has_records(struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, xfs_extlen_t len, enum xbtree_recpacking *outcome); union xfs_btree_rec; extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); xfs_failaddr_t xfs_refcount_check_irec(struct xfs_btree_cur *cur, const struct xfs_refcount_irec *irec); extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); extern struct kmem_cache *xfs_refcount_intent_cache; int __init xfs_refcount_intent_init_cache(void); void xfs_refcount_intent_destroy_cache(void); #endif /* __XFS_REFCOUNT_H__ */ |
16 16 5 12 12 3 3 4 7 1 1 2 4 14 5 1 1 15 7 4 4 4 7 3 4 16 4 12 15 5 4 5 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 2005-2006 Ian Kent <raven@themaw.net> */ #include <linux/seq_file.h> #include <linux/pagemap.h> #include "autofs_i.h" struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi) { struct autofs_info *ino; ino = kzalloc(sizeof(*ino), GFP_KERNEL); if (ino) { INIT_LIST_HEAD(&ino->active); INIT_LIST_HEAD(&ino->expiring); ino->last_used = jiffies; ino->sbi = sbi; ino->count = 1; } return ino; } void autofs_clean_ino(struct autofs_info *ino) { ino->uid = GLOBAL_ROOT_UID; ino->gid = GLOBAL_ROOT_GID; ino->last_used = jiffies; } void autofs_free_ino(struct autofs_info *ino) { kfree_rcu(ino, rcu); } void autofs_kill_sb(struct super_block *sb) { struct autofs_sb_info *sbi = autofs_sbi(sb); /* * In the event of a failure in get_sb_nodev the superblock * info is not present so nothing else has been setup, so * just call kill_anon_super when we are called from * deactivate_super. */ if (sbi) { /* Free wait queues, close pipe */ autofs_catatonic_mode(sbi); put_pid(sbi->oz_pgrp); } pr_debug("shutting down\n"); kill_litter_super(sb); if (sbi) kfree_rcu(sbi, rcu); } static int autofs_show_options(struct seq_file *m, struct dentry *root) { struct autofs_sb_info *sbi = autofs_sbi(root->d_sb); struct inode *root_inode = d_inode(root->d_sb->s_root); if (!sbi) return 0; seq_printf(m, ",fd=%d", sbi->pipefd); if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, root_inode->i_uid)); if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, root_inode->i_gid)); seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp)); seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); seq_printf(m, ",minproto=%d", sbi->min_proto); seq_printf(m, ",maxproto=%d", sbi->max_proto); if (autofs_type_offset(sbi->type)) seq_puts(m, ",offset"); else if (autofs_type_direct(sbi->type)) seq_puts(m, ",direct"); else seq_puts(m, ",indirect"); if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE) seq_puts(m, ",strictexpire"); if (sbi->flags & AUTOFS_SBI_IGNORE) seq_puts(m, ",ignore"); #ifdef CONFIG_CHECKPOINT_RESTORE if (sbi->pipe) seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino); else seq_puts(m, ",pipe_ino=-1"); #endif return 0; } static void autofs_evict_inode(struct inode *inode) { clear_inode(inode); kfree(inode->i_private); } static const struct super_operations autofs_sops = { .statfs = simple_statfs, .show_options = autofs_show_options, .evict_inode = autofs_evict_inode, }; enum { Opt_direct, Opt_fd, Opt_gid, Opt_ignore, Opt_indirect, Opt_maxproto, Opt_minproto, Opt_offset, Opt_pgrp, Opt_strictexpire, Opt_uid, }; const struct fs_parameter_spec autofs_param_specs[] = { fsparam_flag ("direct", Opt_direct), fsparam_fd ("fd", Opt_fd), fsparam_u32 ("gid", Opt_gid), fsparam_flag ("ignore", Opt_ignore), fsparam_flag ("indirect", Opt_indirect), fsparam_u32 ("maxproto", Opt_maxproto), fsparam_u32 ("minproto", Opt_minproto), fsparam_flag ("offset", Opt_offset), fsparam_u32 ("pgrp", Opt_pgrp), fsparam_flag ("strictexpire", Opt_strictexpire), fsparam_u32 ("uid", Opt_uid), {} }; struct autofs_fs_context { kuid_t uid; kgid_t gid; int pgrp; bool pgrp_set; }; /* * Open the fd. We do it here rather than in get_tree so that it's done in the * context of the system call that passed the data and not the one that * triggered the superblock creation, lest the fd gets reassigned. */ static int autofs_parse_fd(struct fs_context *fc, struct autofs_sb_info *sbi, struct fs_parameter *param, struct fs_parse_result *result) { struct file *pipe; int ret; if (param->type == fs_value_is_file) { /* came through the new api */ pipe = param->file; param->file = NULL; } else { pipe = fget(result->uint_32); } if (!pipe) { errorf(fc, "could not open pipe file descriptor"); return -EBADF; } ret = autofs_check_pipe(pipe); if (ret < 0) { errorf(fc, "Invalid/unusable pipe"); if (param->type != fs_value_is_file) fput(pipe); return -EBADF; } autofs_set_packet_pipe_flags(pipe); if (sbi->pipe) fput(sbi->pipe); sbi->pipefd = result->uint_32; sbi->pipe = pipe; return 0; } static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = fc->s_fs_info; struct fs_parse_result result; kuid_t uid; kgid_t gid; int opt; opt = fs_parse(fc, autofs_param_specs, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_fd: return autofs_parse_fd(fc, sbi, param, &result); case Opt_uid: uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) return invalfc(fc, "Invalid uid"); ctx->uid = uid; break; case Opt_gid: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) return invalfc(fc, "Invalid gid"); ctx->gid = gid; break; case Opt_pgrp: ctx->pgrp = result.uint_32; ctx->pgrp_set = true; break; case Opt_minproto: sbi->min_proto = result.uint_32; break; case Opt_maxproto: sbi->max_proto = result.uint_32; break; case Opt_indirect: set_autofs_type_indirect(&sbi->type); break; case Opt_direct: set_autofs_type_direct(&sbi->type); break; case Opt_offset: set_autofs_type_offset(&sbi->type); break; case Opt_strictexpire: sbi->flags |= AUTOFS_SBI_STRICTEXPIRE; break; case Opt_ignore: sbi->flags |= AUTOFS_SBI_IGNORE; } return 0; } static struct autofs_sb_info *autofs_alloc_sbi(void) { struct autofs_sb_info *sbi; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return NULL; sbi->magic = AUTOFS_SBI_MAGIC; sbi->flags = AUTOFS_SBI_CATATONIC; sbi->min_proto = AUTOFS_MIN_PROTO_VERSION; sbi->max_proto = AUTOFS_MAX_PROTO_VERSION; sbi->pipefd = -1; set_autofs_type_indirect(&sbi->type); mutex_init(&sbi->wq_mutex); mutex_init(&sbi->pipe_mutex); spin_lock_init(&sbi->fs_lock); spin_lock_init(&sbi->lookup_lock); INIT_LIST_HEAD(&sbi->active_list); INIT_LIST_HEAD(&sbi->expiring_list); return sbi; } static int autofs_validate_protocol(struct fs_context *fc) { struct autofs_sb_info *sbi = fc->s_fs_info; /* Test versions first */ if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { errorf(fc, "kernel does not match daemon version " "daemon (%d, %d) kernel (%d, %d)\n", sbi->min_proto, sbi->max_proto, AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); return -EINVAL; } /* Establish highest kernel protocol version */ if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION) sbi->version = AUTOFS_MAX_PROTO_VERSION; else sbi->version = sbi->max_proto; switch (sbi->version) { case 4: sbi->sub_version = 7; break; case 5: sbi->sub_version = AUTOFS_PROTO_SUBVERSION; break; default: sbi->sub_version = 0; } return 0; } static int autofs_fill_super(struct super_block *s, struct fs_context *fc) { struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = s->s_fs_info; struct inode *root_inode; struct autofs_info *ino; pr_debug("starting up, sbi = %p\n", sbi); sbi->sb = s; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = AUTOFS_SUPER_MAGIC; s->s_op = &autofs_sops; s->s_d_op = &autofs_dentry_operations; s->s_time_gran = 1; /* * Get the root inode and dentry, but defer checking for errors. */ ino = autofs_new_ino(sbi); if (!ino) return -ENOMEM; root_inode = autofs_get_inode(s, S_IFDIR | 0755); if (!root_inode) return -ENOMEM; root_inode->i_uid = ctx->uid; root_inode->i_gid = ctx->gid; root_inode->i_fop = &autofs_root_operations; root_inode->i_op = &autofs_dir_inode_operations; s->s_root = d_make_root(root_inode); if (unlikely(!s->s_root)) { autofs_free_ino(ino); return -ENOMEM; } s->s_root->d_fsdata = ino; if (ctx->pgrp_set) { sbi->oz_pgrp = find_get_pid(ctx->pgrp); if (!sbi->oz_pgrp) return invalf(fc, "Could not find process group %d", ctx->pgrp); } else sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID); if (autofs_type_trigger(sbi->type)) /* s->s_root won't be contended so there's little to * be gained by not taking the d_lock when setting * d_flags, even when a lot mounts are being done. */ managed_dentry_set_managed(s->s_root); pr_debug("pipe fd = %d, pgrp = %u\n", sbi->pipefd, pid_nr(sbi->oz_pgrp)); sbi->flags &= ~AUTOFS_SBI_CATATONIC; return 0; } /* * Validate the parameters and then request a superblock. */ static int autofs_get_tree(struct fs_context *fc) { struct autofs_sb_info *sbi = fc->s_fs_info; int ret; ret = autofs_validate_protocol(fc); if (ret) return ret; if (sbi->pipefd < 0) return invalf(fc, "No control pipe specified"); return get_tree_nodev(fc, autofs_fill_super); } static void autofs_free_fc(struct fs_context *fc) { struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = fc->s_fs_info; if (sbi) { if (sbi->pipe) fput(sbi->pipe); kfree(sbi); } kfree(ctx); } static const struct fs_context_operations autofs_context_ops = { .free = autofs_free_fc, .parse_param = autofs_parse_param, .get_tree = autofs_get_tree, }; /* * Set up the filesystem mount context. */ int autofs_init_fs_context(struct fs_context *fc) { struct autofs_fs_context *ctx; struct autofs_sb_info *sbi; ctx = kzalloc(sizeof(struct autofs_fs_context), GFP_KERNEL); if (!ctx) goto nomem; ctx->uid = current_uid(); ctx->gid = current_gid(); sbi = autofs_alloc_sbi(); if (!sbi) goto nomem_ctx; fc->fs_private = ctx; fc->s_fs_info = sbi; fc->ops = &autofs_context_ops; return 0; nomem_ctx: kfree(ctx); nomem: return -ENOMEM; } struct inode *autofs_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (inode == NULL) return NULL; inode->i_mode = mode; if (sb->s_root) { inode->i_uid = d_inode(sb->s_root)->i_uid; inode->i_gid = d_inode(sb->s_root)->i_gid; } simple_inode_init_ts(inode); inode->i_ino = get_next_ino(); if (S_ISDIR(mode)) { set_nlink(inode, 2); inode->i_op = &autofs_dir_inode_operations; inode->i_fop = &autofs_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &autofs_symlink_inode_operations; } else WARN_ON(1); return inode; } |
64 64 20 32 24 24 20 1 2 35 35 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "main.h" #include <linux/atomic.h> #include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> #include <linux/crc32c.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/genetlink.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/kernel.h> #include <linux/kobject.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/minmax.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> #include <net/dsfield.h> #include <net/rtnetlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bat_iv_ogm.h" #include "bat_v.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "gateway_common.h" #include "hard-interface.h" #include "log.h" #include "multicast.h" #include "netlink.h" #include "network-coding.h" #include "originator.h" #include "routing.h" #include "send.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" /* List manipulations on hardif_list have to be rtnl_lock()'ed, * list traversals just rcu-locked */ struct list_head batadv_hardif_list; unsigned int batadv_hardif_generation; static int (*batadv_rx_handler[256])(struct sk_buff *skb, struct batadv_hard_iface *recv_if); unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct workqueue_struct *batadv_event_workqueue; static void batadv_recv_handler_init(void); #define BATADV_UEV_TYPE_VAR "BATTYPE=" #define BATADV_UEV_ACTION_VAR "BATACTION=" #define BATADV_UEV_DATA_VAR "BATDATA=" static char *batadv_uev_action_str[] = { "add", "del", "change", "loopdetect", }; static char *batadv_uev_type_str[] = { "gw", "bla", }; static int __init batadv_init(void) { int ret; ret = batadv_tt_cache_init(); if (ret < 0) return ret; INIT_LIST_HEAD(&batadv_hardif_list); batadv_algo_init(); batadv_recv_handler_init(); batadv_v_init(); batadv_iv_init(); batadv_nc_init(); batadv_tp_meter_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); if (!batadv_event_workqueue) goto err_create_wq; register_netdevice_notifier(&batadv_hard_if_notifier); rtnl_link_register(&batadv_link_ops); batadv_netlink_register(); pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n", BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); return 0; err_create_wq: batadv_tt_cache_destroy(); return -ENOMEM; } static void __exit batadv_exit(void) { batadv_netlink_unregister(); rtnl_link_unregister(&batadv_link_ops); unregister_netdevice_notifier(&batadv_hard_if_notifier); destroy_workqueue(batadv_event_workqueue); batadv_event_workqueue = NULL; rcu_barrier(); batadv_tt_cache_destroy(); } /** * batadv_mesh_init() - Initialize soft interface * @soft_iface: netdev struct of the soft interface * * Return: 0 on success or negative error number in case of failure */ int batadv_mesh_init(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); int ret; spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); spin_lock_init(&bat_priv->tt.changes_list_lock); spin_lock_init(&bat_priv->tt.req_list_lock); spin_lock_init(&bat_priv->tt.roam_list_lock); spin_lock_init(&bat_priv->tt.last_changeset_lock); spin_lock_init(&bat_priv->tt.commit_lock); spin_lock_init(&bat_priv->gw.list_lock); #ifdef CONFIG_BATMAN_ADV_MCAST spin_lock_init(&bat_priv->mcast.mla_lock); spin_lock_init(&bat_priv->mcast.want_lists_lock); #endif spin_lock_init(&bat_priv->tvlv.container_list_lock); spin_lock_init(&bat_priv->tvlv.handler_list_lock); spin_lock_init(&bat_priv->softif_vlan_list_lock); spin_lock_init(&bat_priv->tp_list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); INIT_HLIST_HEAD(&bat_priv->gw.gateway_list); #ifdef CONFIG_BATMAN_ADV_MCAST INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list); INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list); INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv6_list); #endif INIT_LIST_HEAD(&bat_priv->tt.changes_list); INIT_HLIST_HEAD(&bat_priv->tt.req_list); INIT_LIST_HEAD(&bat_priv->tt.roam_list); #ifdef CONFIG_BATMAN_ADV_MCAST INIT_HLIST_HEAD(&bat_priv->mcast.mla_list); #endif INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); INIT_HLIST_HEAD(&bat_priv->tp_list); bat_priv->gw.generation = 0; ret = batadv_originator_init(bat_priv); if (ret < 0) { atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); goto err_orig; } ret = batadv_tt_init(bat_priv); if (ret < 0) { atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); goto err_tt; } ret = batadv_v_mesh_init(bat_priv); if (ret < 0) { atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); goto err_v; } ret = batadv_bla_init(bat_priv); if (ret < 0) { atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); goto err_bla; } ret = batadv_dat_init(bat_priv); if (ret < 0) { atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); goto err_dat; } ret = batadv_nc_mesh_init(bat_priv); if (ret < 0) { atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); goto err_nc; } batadv_gw_init(bat_priv); batadv_mcast_init(bat_priv); atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); return 0; err_nc: batadv_dat_free(bat_priv); err_dat: batadv_bla_free(bat_priv); err_bla: batadv_v_mesh_free(bat_priv); err_v: batadv_tt_free(bat_priv); err_tt: batadv_originator_free(bat_priv); err_orig: batadv_purge_outstanding_packets(bat_priv, NULL); atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); return ret; } /** * batadv_mesh_free() - Deinitialize soft interface * @soft_iface: netdev struct of the soft interface */ void batadv_mesh_free(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); batadv_purge_outstanding_packets(bat_priv, NULL); batadv_gw_node_free(bat_priv); batadv_v_mesh_free(bat_priv); batadv_nc_mesh_free(bat_priv); batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); batadv_mcast_free(bat_priv); /* Free the TT and the originator tables only after having terminated * all the other depending components which may use these structures for * their purposes. */ batadv_tt_free(bat_priv); /* Since the originator table clean up routine is accessing the TT * tables as well, it has to be invoked after the TT tables have been * freed and marked as empty. This ensures that no cleanup RCU callbacks * accessing the TT data are scheduled for later execution. */ batadv_originator_free(bat_priv); batadv_gw_free(bat_priv); free_percpu(bat_priv->bat_counters); bat_priv->bat_counters = NULL; atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } /** * batadv_is_my_mac() - check if the given mac address belongs to any of the * real interfaces in the current mesh * @bat_priv: the bat priv with all the soft interface information * @addr: the address to check * * Return: 'true' if the mac address was found, false otherwise. */ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) { const struct batadv_hard_iface *hard_iface; bool is_my_mac = false; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { is_my_mac = true; break; } } rcu_read_unlock(); return is_my_mac; } /** * batadv_max_header_len() - calculate maximum encapsulation overhead for a * payload packet * * Return: the maximum encapsulation overhead in bytes. */ int batadv_max_header_len(void) { int header_len = 0; header_len = max_t(int, header_len, sizeof(struct batadv_unicast_packet)); header_len = max_t(int, header_len, sizeof(struct batadv_unicast_4addr_packet)); header_len = max_t(int, header_len, sizeof(struct batadv_bcast_packet)); #ifdef CONFIG_BATMAN_ADV_NC header_len = max_t(int, header_len, sizeof(struct batadv_coded_packet)); #endif return header_len + ETH_HLEN; } /** * batadv_skb_set_priority() - sets skb priority according to packet content * @skb: the packet to be sent * @offset: offset to the packet content * * This function sets a value between 256 and 263 (802.1d priority), which * can be interpreted by the cfg80211 or other drivers. */ void batadv_skb_set_priority(struct sk_buff *skb, int offset) { struct iphdr ip_hdr_tmp, *ip_hdr; struct ipv6hdr ip6_hdr_tmp, *ip6_hdr; struct ethhdr ethhdr_tmp, *ethhdr; struct vlan_ethhdr *vhdr, vhdr_tmp; u32 prio; /* already set, do nothing */ if (skb->priority >= 256 && skb->priority <= 263) return; ethhdr = skb_header_pointer(skb, offset, sizeof(*ethhdr), ðhdr_tmp); if (!ethhdr) return; switch (ethhdr->h_proto) { case htons(ETH_P_8021Q): vhdr = skb_header_pointer(skb, offset + sizeof(*vhdr), sizeof(*vhdr), &vhdr_tmp); if (!vhdr) return; prio = ntohs(vhdr->h_vlan_TCI) & VLAN_PRIO_MASK; prio = prio >> VLAN_PRIO_SHIFT; break; case htons(ETH_P_IP): ip_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr), sizeof(*ip_hdr), &ip_hdr_tmp); if (!ip_hdr) return; prio = (ipv4_get_dsfield(ip_hdr) & 0xfc) >> 5; break; case htons(ETH_P_IPV6): ip6_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr), sizeof(*ip6_hdr), &ip6_hdr_tmp); if (!ip6_hdr) return; prio = (ipv6_get_dsfield(ip6_hdr) & 0xfc) >> 5; break; default: return; } skb->priority = prio + 256; } static int batadv_recv_unhandled_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { kfree_skb(skb); return NET_RX_DROP; } /* incoming packets with the batman ethertype received on any active hard * interface */ /** * batadv_batman_skb_recv() - Handle incoming message from an hard interface * @skb: the received packet * @dev: the net device that the packet was received on * @ptype: packet type of incoming packet (ETH_P_BATMAN) * @orig_dev: the original receive net device (e.g. bonded device) * * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure */ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct batadv_priv *bat_priv; struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *hard_iface; u8 idx; hard_iface = container_of(ptype, struct batadv_hard_iface, batman_adv_ptype); /* Prevent processing a packet received on an interface which is getting * shut down otherwise the packet may trigger de-reference errors * further down in the receive path. */ if (!kref_get_unless_zero(&hard_iface->refcount)) goto err_out; skb = skb_share_check(skb, GFP_ATOMIC); /* skb was released by skb_share_check() */ if (!skb) goto err_put; /* packet should hold at least type and version */ if (unlikely(!pskb_may_pull(skb, 2))) goto err_free; /* expect a valid ethernet header here. */ if (unlikely(skb->mac_len != ETH_HLEN || !skb_mac_header(skb))) goto err_free; if (!hard_iface->soft_iface) goto err_free; bat_priv = netdev_priv(hard_iface->soft_iface); if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto err_free; /* discard frames on not active interfaces */ if (hard_iface->if_status != BATADV_IF_ACTIVE) goto err_free; batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: incompatible batman version (%i)\n", batadv_ogm_packet->version); goto err_free; } /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); idx = batadv_ogm_packet->packet_type; (*batadv_rx_handler[idx])(skb, hard_iface); batadv_hardif_put(hard_iface); /* return NET_RX_SUCCESS in any case as we * most probably dropped the packet for * routing-logical reasons. */ return NET_RX_SUCCESS; err_free: kfree_skb(skb); err_put: batadv_hardif_put(hard_iface); err_out: return NET_RX_DROP; } static void batadv_recv_handler_init(void) { int i; for (i = 0; i < ARRAY_SIZE(batadv_rx_handler); i++) batadv_rx_handler[i] = batadv_recv_unhandled_packet; for (i = BATADV_UNICAST_MIN; i <= BATADV_UNICAST_MAX; i++) batadv_rx_handler[i] = batadv_recv_unhandled_unicast_packet; /* compile time checks for sizes */ BUILD_BUG_ON(sizeof(struct batadv_bla_claim_dst) != 6); BUILD_BUG_ON(sizeof(struct batadv_ogm_packet) != 24); BUILD_BUG_ON(sizeof(struct batadv_icmp_header) != 20); BUILD_BUG_ON(sizeof(struct batadv_icmp_packet) != 20); BUILD_BUG_ON(sizeof(struct batadv_icmp_packet_rr) != 116); BUILD_BUG_ON(sizeof(struct batadv_unicast_packet) != 10); BUILD_BUG_ON(sizeof(struct batadv_unicast_4addr_packet) != 18); BUILD_BUG_ON(sizeof(struct batadv_frag_packet) != 20); BUILD_BUG_ON(sizeof(struct batadv_bcast_packet) != 14); BUILD_BUG_ON(sizeof(struct batadv_coded_packet) != 46); BUILD_BUG_ON(sizeof(struct batadv_unicast_tvlv_packet) != 20); BUILD_BUG_ON(sizeof(struct batadv_tvlv_hdr) != 4); BUILD_BUG_ON(sizeof(struct batadv_tvlv_gateway_data) != 8); BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_vlan_data) != 8); BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12); BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8); i = sizeof_field(struct sk_buff, cb); BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i); /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; /* unicast packets ... */ /* unicast with 4 addresses packet */ batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet; /* unicast packet */ batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet; /* unicast tvlv packet */ batadv_rx_handler[BATADV_UNICAST_TVLV] = batadv_recv_unicast_tvlv; /* batman icmp packet */ batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; /* Fragmented packets */ batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet; } /** * batadv_recv_handler_register() - Register handler for batman-adv packet type * @packet_type: batadv_packettype which should be handled * @recv_handler: receive handler for the packet type * * Return: 0 on success or negative error number in case of failure */ int batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)) { int (*curr)(struct sk_buff *skb, struct batadv_hard_iface *recv_if); curr = batadv_rx_handler[packet_type]; if (curr != batadv_recv_unhandled_packet && curr != batadv_recv_unhandled_unicast_packet) return -EBUSY; batadv_rx_handler[packet_type] = recv_handler; return 0; } /** * batadv_recv_handler_unregister() - Unregister handler for packet type * @packet_type: batadv_packettype which should no longer be handled */ void batadv_recv_handler_unregister(u8 packet_type) { batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet; } /** * batadv_skb_crc32() - calculate CRC32 of the whole packet and skip bytes in * the header * @skb: skb pointing to fragmented socket buffers * @payload_ptr: Pointer to position inside the head buffer of the skb * marking the start of the data to be CRC'ed * * payload_ptr must always point to an address in the skb head buffer and not to * a fragment. * * Return: big endian crc32c of the checksummed data */ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) { u32 crc = 0; unsigned int from; unsigned int to = skb->len; struct skb_seq_state st; const u8 *data; unsigned int len; unsigned int consumed = 0; from = (unsigned int)(payload_ptr - skb->data); skb_prepare_seq_read(skb, from, to, &st); while ((len = skb_seq_read(consumed, &data, &st)) != 0) { crc = crc32c(crc, data, len); consumed += len; } return htonl(crc); } /** * batadv_get_vid() - extract the VLAN identifier from skb if any * @skb: the buffer containing the packet * @header_len: length of the batman header preceding the ethernet header * * Return: VID with the BATADV_VLAN_HAS_TAG flag when the packet embedded in the * skb is vlan tagged. Otherwise BATADV_NO_FLAGS. */ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) { struct ethhdr *ethhdr = (struct ethhdr *)(skb->data + header_len); struct vlan_ethhdr *vhdr; unsigned short vid; if (ethhdr->h_proto != htons(ETH_P_8021Q)) return BATADV_NO_FLAGS; if (!pskb_may_pull(skb, header_len + VLAN_ETH_HLEN)) return BATADV_NO_FLAGS; vhdr = (struct vlan_ethhdr *)(skb->data + header_len); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; vid |= BATADV_VLAN_HAS_TAG; return vid; } /** * batadv_vlan_ap_isola_get() - return AP isolation status for the given vlan * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier for which the AP isolation attributed as to be * looked up * * Return: true if AP isolation is on for the VLAN identified by vid, false * otherwise */ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) { bool ap_isolation_enabled = false; struct batadv_softif_vlan *vlan; /* if the AP isolation is requested on a VLAN, then check for its * setting in the proper VLAN private data structure */ vlan = batadv_softif_vlan_get(bat_priv, vid); if (vlan) { ap_isolation_enabled = atomic_read(&vlan->ap_isolation); batadv_softif_vlan_put(vlan); } return ap_isolation_enabled; } /** * batadv_throw_uevent() - Send an uevent with batman-adv specific env data * @bat_priv: the bat priv with all the soft interface information * @type: subsystem type of event. Stored in uevent's BATTYPE * @action: action type of event. Stored in uevent's BATACTION * @data: string with additional information to the event (ignored for * BATADV_UEV_DEL). Stored in uevent's BATDATA * * Return: 0 on success or negative error number in case of failure */ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, enum batadv_uev_action action, const char *data) { int ret = -ENOMEM; struct kobject *bat_kobj; char *uevent_env[4] = { NULL, NULL, NULL, NULL }; bat_kobj = &bat_priv->soft_iface->dev.kobj; uevent_env[0] = kasprintf(GFP_ATOMIC, "%s%s", BATADV_UEV_TYPE_VAR, batadv_uev_type_str[type]); if (!uevent_env[0]) goto out; uevent_env[1] = kasprintf(GFP_ATOMIC, "%s%s", BATADV_UEV_ACTION_VAR, batadv_uev_action_str[action]); if (!uevent_env[1]) goto out; /* If the event is DEL, ignore the data field */ if (action != BATADV_UEV_DEL) { uevent_env[2] = kasprintf(GFP_ATOMIC, "%s%s", BATADV_UEV_DATA_VAR, data); if (!uevent_env[2]) goto out; } ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); out: kfree(uevent_env[0]); kfree(uevent_env[1]); kfree(uevent_env[2]); if (ret) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", batadv_uev_type_str[type], batadv_uev_action_str[action], (action == BATADV_UEV_DEL ? "NULL" : data), ret); return ret; } module_init(batadv_init); module_exit(batadv_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR(BATADV_DRIVER_AUTHOR); MODULE_DESCRIPTION(BATADV_DRIVER_DESC); MODULE_VERSION(BATADV_SOURCE_VERSION); MODULE_ALIAS_RTNL_LINK("batadv"); MODULE_ALIAS_GENL_FAMILY(BATADV_NL_NAME); |
4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // SPDX-License-Identifier: GPL-2.0-only /* iptables module to match on related connections */ /* * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_helper.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Related connection matching"); MODULE_ALIAS("ipt_helper"); MODULE_ALIAS("ip6t_helper"); static bool helper_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_helper_info *info = par->matchinfo; const struct nf_conn *ct; const struct nf_conn_help *master_help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; bool ret = info->invert; ct = nf_ct_get(skb, &ctinfo); if (!ct || !ct->master) return ret; master_help = nfct_help(ct->master); if (!master_help) return ret; /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(master_help->helper); if (!helper) return ret; if (info->name[0] == '\0') ret = !ret; else ret ^= !strncmp(helper->name, info->name, strlen(helper->name)); return ret; } static int helper_mt_check(const struct xt_mtchk_param *par) { struct xt_helper_info *info = par->matchinfo; int ret; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } info->name[sizeof(info->name) - 1] = '\0'; return 0; } static void helper_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match helper_mt_reg __read_mostly = { .name = "helper", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = helper_mt_check, .match = helper_mt, .destroy = helper_mt_destroy, .matchsize = sizeof(struct xt_helper_info), .me = THIS_MODULE, }; static int __init helper_mt_init(void) { return xt_register_match(&helper_mt_reg); } static void __exit helper_mt_exit(void) { xt_unregister_match(&helper_mt_reg); } module_init(helper_mt_init); module_exit(helper_mt_exit); |
16 397 396 142 447 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H /* * Define 'struct task_struct' and provide the main scheduler * APIs (schedule(), wakeup variants, etc.) */ #include <uapi/linux/sched.h> #include <asm/current.h> #include <linux/pid.h> #include <linux/sem.h> #include <linux/shm.h> #include <linux/kmsan_types.h> #include <linux/mutex.h> #include <linux/plist.h> #include <linux/hrtimer.h> #include <linux/irqflags.h> #include <linux/seccomp.h> #include <linux/nodemask.h> #include <linux/rcupdate.h> #include <linux/refcount.h> #include <linux/resource.h> #include <linux/latencytop.h> #include <linux/sched/prio.h> #include <linux/sched/types.h> #include <linux/signal_types.h> #include <linux/syscall_user_dispatch.h> #include <linux/mm_types_task.h> #include <linux/task_io_accounting.h> #include <linux/posix-timers.h> #include <linux/rseq.h> #include <linux/seqlock.h> #include <linux/kcsan.h> #include <linux/rv.h> #include <linux/livepatch_sched.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; struct bio_list; struct blk_plug; struct bpf_local_storage; struct bpf_run_ctx; struct capture_control; struct cfs_rq; struct fs_struct; struct futex_pi_state; struct io_context; struct io_uring_task; struct mempolicy; struct nameidata; struct nsproxy; struct perf_event_context; struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; struct robust_list_head; struct root_domain; struct rq; struct sched_attr; struct seq_file; struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; struct user_event_mm; /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). * * We have two separate sets of flags: task->__state * is about runnability, while task->exit_state are * about the task exiting. Confusing, but this way * modifying one set can't modify the other one by * mistake. */ /* Used in tsk->__state: */ #define TASK_RUNNING 0x00000000 #define TASK_INTERRUPTIBLE 0x00000001 #define TASK_UNINTERRUPTIBLE 0x00000002 #define __TASK_STOPPED 0x00000004 #define __TASK_TRACED 0x00000008 /* Used in tsk->exit_state: */ #define EXIT_DEAD 0x00000010 #define EXIT_ZOMBIE 0x00000020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* Used in tsk->__state again: */ #define TASK_PARKED 0x00000040 #define TASK_DEAD 0x00000080 #define TASK_WAKEKILL 0x00000100 #define TASK_WAKING 0x00000200 #define TASK_NOLOAD 0x00000400 #define TASK_NEW 0x00000800 #define TASK_RTLOCK_WAIT 0x00001000 #define TASK_FREEZABLE 0x00002000 #define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP)) #define TASK_FROZEN 0x00008000 #define TASK_STATE_MAX 0x00010000 #define TASK_ANY (TASK_STATE_MAX-1) /* * DO NOT ADD ANY NEW USERS ! */ #define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE) /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED __TASK_TRACED #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) /* Convenience macros for the sake of wake_up(): */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) /* get_task_state(): */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) #define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) #define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) /* * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). */ #define is_special_task_state(state) \ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ do { \ WARN_ON_ONCE(is_special_task_state(state_value)); \ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_special_state_change(state_value) \ do { \ WARN_ON_ONCE(!is_special_task_state(state_value)); \ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_rtlock_wait_set_state() \ do { \ current->saved_state_change = current->task_state_change;\ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_rtlock_wait_restore_state() \ do { \ current->task_state_change = current->saved_state_change;\ } while (0) #else # define debug_normal_state_change(cond) do { } while (0) # define debug_special_state_change(cond) do { } while (0) # define debug_rtlock_wait_set_state() do { } while (0) # define debug_rtlock_wait_restore_state() do { } while (0) #endif /* * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); * if (CONDITION) * break; * * schedule(); * } * __set_current_state(TASK_RUNNING); * * If the caller does not need such serialisation (because, for instance, the * CONDITION test and condition change and wakeup are under the same lock) then * use __set_current_state(). * * The above is typically ordered against the wakeup, which does: * * CONDITION = 1; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * where wake_up_state()/try_to_wake_up() executes a full memory barrier before * accessing p->__state. * * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * * However, with slightly different timing the wakeup TASK_RUNNING store can * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not * a problem either because that will result in one extra go around the loop * and our @cond test will save the day. * * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ smp_store_mb(current->__state, (state_value)); \ } while (0) /* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must * serialize against wakeups such that any possible in-flight TASK_RUNNING * stores will not collide with our state change. */ #define set_special_state(state_value) \ do { \ unsigned long flags; /* may shadow */ \ \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ debug_special_state_change((state_value)); \ WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) /* * PREEMPT_RT specific variants for "sleeping" spin/rwlocks * * RT's spin/rwlock substitutions are state preserving. The state of the * task when blocking on the lock is saved in task_struct::saved_state and * restored after the lock has been acquired. These operations are * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT * lock related wakeups while the task is blocked on the lock are * redirected to operate on task_struct::saved_state to ensure that these * are not dropped. On restore task_struct::saved_state is set to * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. * * The lock operation looks like this: * * current_save_and_set_rtlock_wait_state(); * for (;;) { * if (try_lock()) * break; * raw_spin_unlock_irq(&lock->wait_lock); * schedule_rtlock(); * raw_spin_lock_irq(&lock->wait_lock); * set_current_state(TASK_RTLOCK_WAIT); * } * current_restore_rtlock_saved_state(); */ #define current_save_and_set_rtlock_wait_state() \ do { \ lockdep_assert_irqs_disabled(); \ raw_spin_lock(¤t->pi_lock); \ current->saved_state = current->__state; \ debug_rtlock_wait_set_state(); \ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ raw_spin_unlock(¤t->pi_lock); \ } while (0); #define current_restore_rtlock_saved_state() \ do { \ lockdep_assert_irqs_disabled(); \ raw_spin_lock(¤t->pi_lock); \ debug_rtlock_wait_restore_state(); \ WRITE_ONCE(current->__state, current->saved_state); \ current->saved_state = TASK_RUNNING; \ raw_spin_unlock(¤t->pi_lock); \ } while (0); #define get_current_state() READ_ONCE(current->__state) /* * Define the task command name length as enum, then it can be visible to * BPF programs. */ enum { TASK_COMM_LEN = 16, }; extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern long schedule_timeout(long timeout); extern long schedule_timeout_interruptible(long timeout); extern long schedule_timeout_killable(long timeout); extern long schedule_timeout_uninterruptible(long timeout); extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); asmlinkage void preempt_schedule_irq(void); #ifdef CONFIG_PREEMPT_RT extern void schedule_rtlock(void); #endif extern int __must_check io_schedule_prepare(void); extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); /** * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode * @lock: protects the above two fields * * Stores previous user/system time values such that we can guarantee * monotonicity. */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE u64 utime; u64 stime; raw_spinlock_t lock; #endif }; enum vtime_state { /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, /* Task is idle */ VTIME_IDLE, /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, /* Task runs in userspace in a CPU with VTIME active: */ VTIME_USER, /* Task runs as guests in a CPU with VTIME active: */ VTIME_GUEST, }; struct vtime { seqcount_t seqcount; unsigned long long starttime; enum vtime_state state; unsigned int cpu; u64 utime; u64 stime; u64 gtime; }; /* * Utilization clamp constraints. * @UCLAMP_MIN: Minimum utilization * @UCLAMP_MAX: Maximum utilization * @UCLAMP_CNT: Utilization clamp constraints count */ enum uclamp_id { UCLAMP_MIN = 0, UCLAMP_MAX, UCLAMP_CNT }; #ifdef CONFIG_SMP extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; #endif struct sched_param { int sched_priority; }; struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ /* # of times we have run on this CPU: */ unsigned long pcount; /* Time spent waiting on a runqueue: */ unsigned long long run_delay; /* Timestamps: */ /* When did we last run on a CPU? */ unsigned long long last_arrival; /* When were we last queued to run? */ unsigned long long last_queued; #endif /* CONFIG_SCHED_INFO */ }; /* * Integer metrics need fixed point arithmetic, e.g., sched/fair * has a few: load, load_avg, util_avg, freq, and capacity. * * We define a basic fixed point arithmetic range, and then formalize * all these metrics based on that basic range. */ # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) /* Increase resolution of cpu_capacity calculations */ # define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT # define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) struct load_weight { unsigned long weight; u32 inv_weight; }; /** * struct util_est - Estimation utilization of FAIR tasks * @enqueued: instantaneous estimated utilization of a task/cpu * @ewma: the Exponential Weighted Moving Average (EWMA) * utilization of a task * * Support data structure to track an Exponential Weighted Moving Average * (EWMA) of a FAIR task's utilization. New samples are added to the moving * average each time a task completes an activation. Sample's weight is chosen * so that the EWMA will be relatively insensitive to transient changes to the * task's workload. * * The enqueued attribute has a slightly different meaning for tasks and cpus: * - task: the task's util_avg at last task dequeue time * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU * Thus, the util_est.enqueued of a task represents the contribution on the * estimated utilization of the CPU where that task is currently enqueued. * * Only for tasks we track a moving average of the past instantaneous * estimated utilization. This allows to absorb sporadic drops in utilization * of an otherwise almost periodic task. * * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg * updates. When a task is dequeued, its util_est should not be updated if its * util_avg has not been updated in the meantime. * This information is mapped into the MSB bit of util_est.enqueued at dequeue * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg * for a task) it is safe to use MSB. */ struct util_est { unsigned int enqueued; unsigned int ewma; #define UTIL_EST_WEIGHT_SHIFT 2 #define UTIL_AVG_UNCHANGED 0x80000000 } __attribute__((__aligned__(sizeof(u64)))); /* * The load/runnable/util_avg accumulates an infinite geometric series * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). * * [load_avg definition] * * load_avg = runnable% * scale_load_down(load) * * [runnable_avg definition] * * runnable_avg = runnable% * SCHED_CAPACITY_SCALE * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE * * where runnable% is the time ratio that a sched_entity is runnable and * running% the time ratio that a sched_entity is running. * * For cfs_rq, they are the aggregated values of all runnable and blocked * sched_entities. * * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU * capacity scaling. The scaling is done through the rq_clock_pelt that is used * for computing those signals (see update_rq_clock_pelt()) * * N.B., the above ratios (runnable% and running%) themselves are in the * range of [0, 1]. To do fixed point arithmetics, we therefore scale them * to as large a range as necessary. This is for example reflected by * util_avg's SCHED_CAPACITY_SCALE. * * [Overflow issue] * * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities * with the highest load (=88761), always runnable on a single cfs_rq, * and should not overflow as the number already hits PID_MAX_LIMIT. * * For all other cases (including 32-bit kernels), struct load_weight's * weight will overflow first before we do, because: * * Max(load_avg) <= Max(load.weight) * * Then it is the load_weight's responsibility to consider overflow * issues. */ struct sched_avg { u64 last_update_time; u64 load_sum; u64 runnable_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; unsigned long runnable_avg; unsigned long util_avg; struct util_est util_est; } ____cacheline_aligned; struct sched_statistics { #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; u64 wait_count; u64 wait_sum; u64 iowait_count; u64 iowait_sum; u64 sleep_start; u64 sleep_max; s64 sum_sleep_runtime; u64 block_start; u64 block_max; s64 sum_block_runtime; u64 exec_max; u64 slice_max; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; u64 nr_wakeups; u64 nr_wakeups_sync; u64 nr_wakeups_migrate; u64 nr_wakeups_local; u64 nr_wakeups_remote; u64 nr_wakeups_affine; u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; #ifdef CONFIG_SCHED_CORE u64 core_forceidle_sum; #endif #endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; struct sched_entity { /* For load-balancing: */ struct load_weight load; struct rb_node run_node; u64 deadline; u64 min_deadline; struct list_head group_node; unsigned int on_rq; u64 exec_start; u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; s64 vlag; u64 slice; u64 nr_migrations; #ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; /* cached value of my_q->h_nr_running */ unsigned long runnable_weight; #endif #ifdef CONFIG_SMP /* * Per entity load average tracking. * * Put into separate cache line so it does not * collide with read-mostly values above. */ struct sched_avg avg; #endif }; struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned long watchdog_stamp; unsigned int time_slice; unsigned short on_rq; unsigned short on_list; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ struct rt_rq *rt_rq; /* rq "owned" by this entity/group: */ struct rt_rq *my_q; #endif } __randomize_layout; struct sched_dl_entity { struct rb_node rb_node; /* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr(). */ u64 dl_runtime; /* Maximum runtime for each instance */ u64 dl_deadline; /* Relative deadline of each instance */ u64 dl_period; /* Separation of two instances (period) */ u64 dl_bw; /* dl_runtime / dl_period */ u64 dl_density; /* dl_runtime / dl_deadline */ /* * Actual scheduling parameters. Initialized with the values above, * they are continuously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ s64 runtime; /* Remaining runtime for this instance */ u64 deadline; /* Absolute deadline for this instance */ unsigned int flags; /* Specifying the scheduler behaviour */ /* * Some bool flags: * * @dl_throttled tells if we exhausted the runtime. If so, the * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * * @dl_non_contending tells if the task is inactive while still * contributing to the active utilization. In other words, it * indicates if the inactive timer has been armed and its handler * has not been executed yet. This flag is useful to avoid race * conditions between the inactive timer handler and the wakeup * code. * * @dl_overrun tells if the task asked to be informed about runtime * overruns. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ struct hrtimer dl_timer; /* * Inactive timer, responsible for decreasing the active utilization * at the "0-lag time". When a -deadline task blocks, it contributes * to GRUB's active utilization until the "0-lag time", hence a * timer is needed to decrease the active utilization at the correct * time. */ struct hrtimer inactive_timer; #ifdef CONFIG_RT_MUTEXES /* * Priority Inheritance. When a DEADLINE scheduling entity is boosted * pi_se points to the donor, otherwise points to the dl_se it belongs * to (the original one/itself). */ struct sched_dl_entity *pi_se; #endif }; #ifdef CONFIG_UCLAMP_TASK /* Number of utilization clamp buckets (shorter alias) */ #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT /* * Utilization clamp for a scheduling entity * @value: clamp value "assigned" to a se * @bucket_id: bucket index corresponding to the "assigned" value * @active: the se is currently refcounted in a rq's bucket * @user_defined: the requested clamp value comes from user-space * * The bucket_id is the index of the clamp bucket matching the clamp value * which is pre-computed and stored to avoid expensive integer divisions from * the fast path. * * The active bit is set whenever a task has got an "effective" value assigned, * which can be different from the clamp value "requested" from user-space. * This allows to know a task is refcounted in the rq's bucket corresponding * to the "effective" bucket_id. * * The user_defined bit is set whenever a task has got a task-specific clamp * value requested from userspace, i.e. the system defaults apply to this task * just as a restriction. This allows to relax default clamps when a less * restrictive task-specific value has been requested, thus allowing to * implement a "nice" semantic. For example, a task running with a 20% * default boost can still drop its own boosting to 0%. */ struct uclamp_se { unsigned int value : bits_per(SCHED_CAPACITY_SCALE); unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); unsigned int active : 1; unsigned int user_defined : 1; }; #endif /* CONFIG_UCLAMP_TASK */ union rcu_special { struct { u8 blocked; u8 need_qs; u8 exp_hint; /* Hint for performance. */ u8 need_mb; /* Readers need smp_mb(). */ } b; /* Bits. */ u32 s; /* Set of bits. */ }; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, perf_sw_context, perf_nr_task_contexts, }; struct wake_q_node { struct wake_q_node *next; }; struct kmap_ctrl { #ifdef CONFIG_KMAP_LOCAL int idx; pte_t pteval[KM_MAX_IDX]; #endif }; struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ struct thread_info thread_info; #endif unsigned int __state; /* saved state for "spinlock sleepers" */ unsigned int saved_state; /* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. */ randomized_struct_fields_start void *stack; refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; unsigned int ptrace; #ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; /* * recent_used_cpu is initially set as the last CPU used by a task * that wakes affine another task. Waker/wakee relationships can * push tasks around a CPU where each wakeup moves to the next one. * Tracking a recently used CPU allows a quick search for a recently * used CPU that may be idle. */ int recent_used_cpu; int wake_cpu; #endif int on_rq; int prio; int static_prio; int normal_prio; unsigned int rt_priority; struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE struct rb_node core_node; unsigned long core_cookie; unsigned int core_occupation; #endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif #ifdef CONFIG_UCLAMP_TASK /* * Clamp values requested for a scheduling entity. * Must be updated with task_rq_lock() held. */ struct uclamp_se uclamp_req[UCLAMP_CNT]; /* * Effective clamp values used for a scheduling entity. * Must be updated with task_rq_lock() held. */ struct uclamp_se uclamp[UCLAMP_CNT]; #endif struct sched_statistics stats; #ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; #endif #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; #endif unsigned int policy; int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t *user_cpus_ptr; cpumask_t cpus_mask; void *migration_pending; #ifdef CONFIG_SMP unsigned short migration_disabled; #endif unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU unsigned long rcu_tasks_nvcsw; u8 rcu_tasks_holdout; u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU int trc_reader_nesting; int trc_ipi_to_cpu; union rcu_special trc_reader_special; struct list_head trc_holdout_list; struct list_head trc_blkd_node; int trc_blkd_cpu; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ struct sched_info sched_info; struct list_head tasks; #ifdef CONFIG_SMP struct plist_node pushable_tasks; struct rb_node pushable_dl_tasks; #endif struct mm_struct *mm; struct mm_struct *active_mm; struct address_space *faults_disabled_mapping; int exit_state; int exit_code; int exit_signal; /* The signal sent when the parent dies: */ int pdeath_signal; /* JOBCTL_*, siglock protected: */ unsigned long jobctl; /* Used for emulating ABI behavior of previous Linux versions: */ unsigned int personality; /* Scheduler bits, serialized by scheduler locks: */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; /* Force alignment to the next boundary: */ unsigned :0; /* Unserialized, strictly 'current' */ /* * This field must not be in the scheduler word above due to wakelist * queueing no longer being serialized by p->on_cpu. However: * * p->XXX = X; ttwu() * schedule() if (p->on_rq && ..) // false * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true * deactivate_task() ttwu_queue_wakelist()) * p->on_rq = 0; p->sched_remote_wakeup = Y; * * guarantees all stores of 'current' are visible before * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; #ifdef CONFIG_RT_MUTEXES unsigned sched_rt_mutex:1; #endif /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG unsigned in_user_fault:1; #endif #ifdef CONFIG_LRU_GEN /* whether the LRU algorithm may apply to this access */ unsigned in_lru_fault:1; #endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif #ifdef CONFIG_CGROUPS /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; /* task is frozen/stopped (used by the cgroup freezer) */ unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP unsigned use_memdelay:1; #endif #ifdef CONFIG_PSI /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif #ifdef CONFIG_PAGE_OWNER /* Used by page_owner=on to detect recursion in page tracking. */ unsigned in_page_owner:1; #endif #ifdef CONFIG_EVENTFD /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif #ifdef CONFIG_IOMMU_SVA unsigned pasid_activated:1; #endif #ifdef CONFIG_CPU_SUP_INTEL unsigned reported_split_lock:1; #endif #ifdef CONFIG_TASK_DELAY_ACCT /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif unsigned long atomic_flags; /* Flags requiring atomic access. */ struct restart_block restart_block; pid_t pid; pid_t tgid; #ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsigned long stack_canary; #endif /* * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ /* Real parent process: */ struct task_struct __rcu *real_parent; /* Recipient of SIGCHLD, wait4() reports: */ struct task_struct __rcu *parent; /* * Children/sibling form the list of natural children: */ struct list_head children; struct list_head sibling; struct task_struct *group_leader; /* * 'ptraced' is the list of tasks this task is using ptrace() on. * * This includes both natural children and PTRACE_ATTACH targets. * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ struct list_head ptraced; struct list_head ptrace_entry; /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; struct list_head thread_node; struct completion *vfork_done; /* CLONE_CHILD_SETTID: */ int __user *set_child_tid; /* CLONE_CHILD_CLEARTID: */ int __user *clear_child_tid; /* PF_KTHREAD | PF_IO_WORKER */ void *worker_private; u64 utime; u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME u64 utimescaled; u64 stimescaled; #endif u64 gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN struct vtime vtime; #endif #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif /* Context switch counts: */ unsigned long nvcsw; unsigned long nivcsw; /* Monotonic time in nsecs: */ u64 start_time; /* Boot based time in nsecs: */ u64 start_boottime; /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ unsigned long min_flt; unsigned long maj_flt; /* Empty if CONFIG_POSIX_CPUTIMERS=n */ struct posix_cputimers posix_cputimers; #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK struct posix_cputimers_work posix_cputimers_work; #endif /* Process credentials: */ /* Tracer's credentials at attach: */ const struct cred __rcu *ptracer_cred; /* Objective and real subjective task credentials (COW): */ const struct cred __rcu *real_cred; /* Effective (overridable) subjective task credentials (COW): */ const struct cred __rcu *cred; #ifdef CONFIG_KEYS /* Cached requested key. */ struct key *cached_requested_key; #endif /* * executable name, excluding path. * * - normally initialized setup_new_exec() * - access it with [gs]et_task_comm() * - lock it with task_lock() */ char comm[TASK_COMM_LEN]; struct nameidata *nameidata; #ifdef CONFIG_SYSVIPC struct sysv_sem sysvsem; struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; unsigned long last_switch_time; #endif /* Filesystem information: */ struct fs_struct *fs; /* Open file information: */ struct files_struct *files; #ifdef CONFIG_IO_URING struct io_uring_task *io_uring; #endif /* Namespaces: */ struct nsproxy *nsproxy; /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct __rcu *sighand; sigset_t blocked; sigset_t real_blocked; /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; struct callback_head *task_works; #ifdef CONFIG_AUDIT #ifdef CONFIG_AUDITSYSCALL struct audit_context *audit_context; #endif kuid_t loginuid; unsigned int sessionid; #endif struct seccomp seccomp; struct syscall_user_dispatch syscall_dispatch; /* Thread group tracking: */ u64 parent_exec_id; u64 self_exec_id; /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock; /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; struct wake_q_node wake_q; #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ struct rb_root_cached pi_waiters; /* Updated under owner's pi_lock and rq lock */ struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ struct rt_mutex_waiter *pi_blocked_on; #endif #ifdef CONFIG_DEBUG_MUTEXES /* Mutex deadlock detection: */ struct mutex_waiter *blocked_on; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP int non_block_count; #endif #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events irqtrace; unsigned int hardirq_threaded; u64 hardirq_chain_key; int softirqs_enabled; int softirq_context; int irq_config; #endif #ifdef CONFIG_PREEMPT_RT int softirq_disable_cnt; #endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; unsigned int lockdep_recursion; struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif #if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP) unsigned int in_ubsan; #endif /* Journalling filesystem info: */ void *journal_info; /* Stacked block device info: */ struct bio_list *bio_list; /* Stack plugging: */ struct blk_plug *plug; /* VM state: */ struct reclaim_state *reclaim_state; struct io_context *io_context; #ifdef CONFIG_COMPACTION struct capture_control *capture_control; #endif /* Ptrace state: */ unsigned long ptrace_message; kernel_siginfo_t *last_siginfo; struct task_io_accounting ioac; #ifdef CONFIG_PSI /* Pressure stall state */ unsigned int psi_flags; #endif #ifdef CONFIG_TASK_XACCT /* Accumulated RSS usage: */ u64 acct_rss_mem1; /* Accumulated virtual memory usage: */ u64 acct_vm_mem1; /* stime + utime since last update: */ u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock: */ struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif #ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; struct mutex futex_exit_mutex; unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT unsigned long preempt_disable_ip; #endif #ifdef CONFIG_NUMA /* Protected by alloc_lock: */ struct mempolicy *mempolicy; short il_prev; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; unsigned long numa_migrate_retry; /* Migration stamp: */ u64 node_stamp; u64 last_task_numa_placement; u64 last_sum_exec_runtime; struct callback_head numa_work; /* * This pointer is only modified for current in syscall and * pagefault context (and for tasks being destroyed), so it can be read * from any of the following contexts: * - RCU read-side critical section * - current->numa_group from everywhere * - task's runqueue locked, task not running */ struct numa_group __rcu *numa_group; /* * numa_faults is an array split into four regions: * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer * in this precise order. * * faults_memory: Exponential decaying average of faults on a per-node * basis. Scheduling placement decisions are made based on these * counts. The values remain static for the duration of a PTE scan. * faults_cpu: Track the nodes the process was running on when a NUMA * hinting fault was incurred. * faults_memory_buffer and faults_cpu_buffer: Record faults per node * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ unsigned long *numa_faults; unsigned long total_numa_faults; /* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local or failed to migrate. The task scan * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_RSEQ struct rseq __user *rseq; u32 rseq_len; u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption. */ unsigned long rseq_event_mask; #endif #ifdef CONFIG_SCHED_MM_CID int mm_cid; /* Current cid in mm */ int last_mm_cid; /* Most recent cid in mm */ int migrate_from_cpu; int mm_cid_active; /* Whether cid bitmap is active */ struct callback_head cid_work; #endif struct tlbflush_unmap_batch tlb_ubc; /* Cache last used pipe for splice(): */ struct pipe_inode_info *splice_pipe; struct page_frag task_frag; #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif #ifdef CONFIG_FAULT_INJECTION int make_it_fail; unsigned int fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for a dirty throttling pause: */ int nr_dirtied; int nr_dirtied_pause; /* Start of a write-and-pause period: */ unsigned long dirty_paused_when; #ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif /* * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ u64 timer_slack_ns; u64 default_timer_slack_ns; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) unsigned int kasan_depth; #endif #ifdef CONFIG_KCSAN struct kcsan_ctx kcsan_ctx; #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events kcsan_save_irqtrace; #endif #ifdef CONFIG_KCSAN_WEAK_MEMORY int kcsan_stack_depth; #endif #endif #ifdef CONFIG_KMSAN struct kmsan_ctx kmsan_ctx; #endif #if IS_ENABLED(CONFIG_KUNIT) struct kunit *kunit_test; #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ int curr_ret_stack; int curr_ret_depth; /* Stack of return addresses for return function tracing: */ struct ftrace_ret_stack *ret_stack; /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; /* * Number of functions that haven't been traced * because of depth overrun: */ atomic_t trace_overrun; /* Pause tracing: */ atomic_t tracing_graph_pause; #endif #ifdef CONFIG_TRACING /* Bitmask and counter of trace recursion: */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV /* See kernel/kcov.c for more details. */ /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; /* Size of the kcov_area: */ unsigned int kcov_size; /* Buffer for coverage collection: */ void *kcov_area; /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; /* KCOV common handle for remote coverage collection: */ u64 kcov_handle; /* KCOV sequence number: */ int kcov_sequence; /* Collect coverage from softirq context: */ unsigned int kcov_softirq; #endif #ifdef CONFIG_MEMCG struct mem_cgroup *memcg_in_oom; gfp_t memcg_oom_gfp_mask; int memcg_oom_order; /* Number of pages to reclaim on returning to userland: */ unsigned int memcg_nr_pages_over_high; /* Used by memcontrol for targeted memcg charge: */ struct mem_cgroup *active_memcg; #endif #ifdef CONFIG_MEMCG_KMEM struct obj_cgroup *objcg; #endif #ifdef CONFIG_BLK_CGROUP struct gendisk *throttle_disk; #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) unsigned int sequential_io; unsigned int sequential_io_avg; #endif struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; # ifdef CONFIG_PREEMPT_RT unsigned long saved_state_change; # endif #endif struct rcu_head rcu; refcount_t rcu_users; int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; struct timer_list oom_reaper_timer; #endif #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ refcount_t stack_refcount; #endif #ifdef CONFIG_LIVEPATCH int patch_state; #endif #ifdef CONFIG_SECURITY /* Used by LSM modules for access restriction: */ void *security; #endif #ifdef CONFIG_BPF_SYSCALL /* Used by BPF task local storage */ struct bpf_local_storage __rcu *bpf_storage; /* Used for BPF run context */ struct bpf_run_ctx *bpf_ctx; #endif #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; unsigned long prev_lowest_stack; #endif #ifdef CONFIG_X86_MCE void __user *mce_vaddr; __u64 mce_kflags; u64 mce_addr; __u64 mce_ripv : 1, mce_whole_page : 1, __mce_reserved : 62; struct callback_head mce_kill_me; int mce_count; #endif #ifdef CONFIG_KRETPROBES struct llist_head kretprobe_instances; #endif #ifdef CONFIG_RETHOOK struct llist_head rethooks; #endif #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH /* * If L1D flush is supported on mm context switch * then we use this callback head to queue kill work * to kill tasks that are not running on SMT disabled * cores */ struct callback_head l1d_flush_kill; #endif #ifdef CONFIG_RV /* * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. * If we find justification for more monitors, we can think * about adding more or developing a dynamic method. So far, * none of these are justified. */ union rv_task_monitor rv[RV_PER_TASK_MONITORS]; #endif #ifdef CONFIG_USER_EVENTS struct user_event_mm *user_event_mm; #endif /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. */ randomized_struct_fields_end /* CPU-specific state of this task: */ struct thread_struct thread; /* * WARNING: on x86, 'thread_struct' contains a variable-sized * structure. It *MUST* be at the end of 'task_struct'. * * Do not put anything below here! */ }; static inline struct pid *task_pid(struct task_struct *task) { return task->thread_pid; } /* * the helpers to get the task's different pids as they are seen * from various namespaces * * task_xid_nr() : global id, i.e. the id seen from the init namespace; * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of * current. * task_xid_nr_ns() : id seen from the ns specified; * * see also pid_nr() etc in include/linux/pid.h */ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); static inline pid_t task_pid_nr(struct task_struct *tsk) { return tsk->pid; } static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); } static inline pid_t task_pid_vnr(struct task_struct *tsk) { return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); } static inline pid_t task_tgid_nr(struct task_struct *tsk) { return tsk->tgid; } /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. * * Test if a process is not yet dead (at most zombie state) * If pid_alive fails, then pointers within the task structure * can be stale and must not be dereferenced. * * Return: 1 if the process is alive. 0 otherwise. */ static inline int pid_alive(const struct task_struct *p) { return p->thread_pid != NULL; } static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); } static inline pid_t task_pgrp_vnr(struct task_struct *tsk) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); } static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); } static inline pid_t task_session_vnr(struct task_struct *tsk) { return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); } static inline pid_t task_tgid_vnr(struct task_struct *tsk) { return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); } static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) { pid_t pid = 0; rcu_read_lock(); if (pid_alive(tsk)) pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); rcu_read_unlock(); return pid; } static inline pid_t task_ppid_nr(const struct task_struct *tsk) { return task_ppid_nr_ns(tsk, &init_pid_ns); } /* Obsolete, do not use: */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { return task_pgrp_nr_ns(tsk, &init_pid_ns); } #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) static inline unsigned int __task_state_index(unsigned int tsk_state, unsigned int tsk_exit_state) { unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); if ((tsk_state & TASK_IDLE) == TASK_IDLE) state = TASK_REPORT_IDLE; /* * We're lying here, but rather than expose a completely new task state * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. */ if (tsk_state & TASK_RTLOCK_WAIT) state = TASK_UNINTERRUPTIBLE; return fls(state); } static inline unsigned int task_state_index(struct task_struct *tsk) { return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state); } static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); return state_char[state]; } static inline char task_state_to_char(struct task_struct *tsk) { return task_index_to_char(task_state_index(tsk)); } /** * is_global_init - check if a task structure is init. Since init * is free to have sub-threads we need to check tgid. * @tsk: Task structure to be checked. * * Check if a task structure is the first user space task the kernel created. * * Return: 1 if the task structure is init. 0 otherwise. */ static inline int is_global_init(struct task_struct *tsk) { return task_tgid_nr(tsk) == 1; } extern struct pid *cad_pid; /* * Per process flags */ #define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ #define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */ #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to, * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF__HOLE__00800000 0x00800000 #define PF__HOLE__01000000 0x01000000 #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ #define PF__HOLE__20000000 0x20000000 #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example * with tsk_used_math (like during threaded core dumping). * There is however an exception to this rule during ptrace * or during fork: the ptracer task is allowed to write to the * child->flags of its traced child (same goes for fork, the parent * can write to the child->flags), because we're guaranteed the * child is not running and in turn not changing child->flags * at the same time the parent does it. */ #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) #define clear_used_math() clear_stopped_child_used_math(current) #define set_used_math() set_stopped_child_used_math(current) #define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) #define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) #define copy_to_stopped_child_used_math(child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && (current->nr_cpus_allowed == 1); #else return true; #endif } /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ { return test_bit(PFA_##name, &p->atomic_flags); } #define TASK_PFA_SET(name, func) \ static inline void task_set_##func(struct task_struct *p) \ { set_bit(PFA_##name, &p->atomic_flags); } #define TASK_PFA_CLEAR(name, func) \ static inline void task_clear_##func(struct task_struct *p) \ { clear_bit(PFA_##name, &p->atomic_flags); } TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) TASK_PFA_TEST(SPREAD_PAGE, spread_page) TASK_PFA_SET(SPREAD_PAGE, spread_page) TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { current->flags &= ~flags; current->flags |= orig_flags & flags; } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); #ifdef CONFIG_SMP /* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); /** * set_cpus_allowed_ptr - set CPU affinity mask of a task * @p: the task * @new_mask: CPU affinity mask * * Return: zero if successful, or a negative error code */ extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask); extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { } static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { if (!cpumask_test_cpu(0, new_mask)) return -EINVAL; return 0; } static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { if (src->user_cpus_ptr) return -EINVAL; return 0; } static inline void release_user_cpus_ptr(struct task_struct *p) { WARN_ON(p->user_cpus_ptr); } static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) { return 0; } #endif extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); /** * task_nice - return the nice value of a given task. * @p: the task in question. * * Return: The nice value [ -20 ... 0 ... 19 ]. */ static inline int task_nice(const struct task_struct *p) { return PRIO_TO_NICE((p)->static_prio); } extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int available_idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern void sched_set_fifo(struct task_struct *p); extern void sched_set_fifo_low(struct task_struct *p); extern void sched_set_normal(struct task_struct *p, int nice); extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); extern struct task_struct *idle_task(int cpu); /** * is_idle_task - is the specified task an idle task? * @p: the task in question. * * Return: 1 if @p is an idle task. 0 otherwise. */ static __always_inline bool is_idle_task(const struct task_struct *p) { return !!(p->flags & PF_IDLE); } extern struct task_struct *curr_task(int cpu); extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { #ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK struct task_struct task; #endif #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; #ifndef CONFIG_THREAD_INFO_IN_TASK extern struct thread_info init_thread_info; #endif extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK # define task_thread_info(task) (&(task)->thread_info) #elif !defined(__HAVE_THREAD_FUNCTIONS) # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif /* * find a task by one of its numerical ids * * find_task_by_pid_ns(): * finds a task by its pid in the specified namespace * find_task_by_vpid(): * finds a task by its virtual pid * * see also find_vpid() etc in include/linux/pid.h */ extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); /* * find a task by its virtual pid and get the task struct */ extern struct task_struct *find_get_task_by_vpid(pid_t nr); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); #else static inline void kick_process(struct task_struct *tsk) { } #endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); static inline void set_task_comm(struct task_struct *tsk, const char *from) { __set_task_comm(tsk, from, false); } extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); #define get_task_comm(buf, tsk) ({ \ BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ __get_task_comm(buf, sizeof(buf), tsk); \ }) #ifdef CONFIG_SMP static __always_inline void scheduler_ipi(void) { /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send * this IPI. */ preempt_fold_need_resched(); } #else static inline void scheduler_ipi(void) { } #endif extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); /* * Set thread flags in other task's structures. * See asm/thread_info.h for TIF_xxxx flags available: */ static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) { set_ti_thread_flag(task_thread_info(tsk), flag); } static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) { clear_ti_thread_flag(task_thread_info(tsk), flag); } static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, bool value) { update_ti_thread_flag(task_thread_info(tsk), flag, value); } static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); } static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); } static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_ti_thread_flag(task_thread_info(tsk), flag); } static inline void set_tsk_need_resched(struct task_struct *tsk) { set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } static inline void clear_tsk_need_resched(struct task_struct *tsk) { clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } static inline int test_tsk_need_resched(struct task_struct *tsk) { return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return * value indicates whether a reschedule was done in fact. * cond_resched_lock() will drop the spinlock before scheduling, */ #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) void sched_dynamic_klp_enable(void); void sched_dynamic_klp_disable(void); DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) { return static_call_mod(cond_resched)(); } #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) extern int dynamic_cond_resched(void); static __always_inline int _cond_resched(void) { return dynamic_cond_resched(); } #else /* !CONFIG_PREEMPTION */ static inline int _cond_resched(void) { klp_sched_try_switch(); return __cond_resched(); } #endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ #else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */ static inline int _cond_resched(void) { klp_sched_try_switch(); return 0; } #endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */ #define cond_resched() ({ \ __might_resched(__FILE__, __LINE__, 0); \ _cond_resched(); \ }) extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_rwlock_read(rwlock_t *lock); extern int __cond_resched_rwlock_write(rwlock_t *lock); #define MIGHT_RESCHED_RCU_SHIFT 8 #define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) #ifndef CONFIG_PREEMPT_RT /* * Non RT kernels have an elevated preempt count due to the held lock, * but are not allowed to be inside a RCU read side critical section */ # define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET #else /* * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in * cond_resched*lock() has to take that into account because it checks for * preempt_count() and rcu_preempt_depth(). */ # define PREEMPT_LOCK_RESCHED_OFFSETS \ (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) #endif #define cond_resched_lock(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_lock(lock); \ }) #define cond_resched_rwlock_read(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_rwlock_read(lock); \ }) #define cond_resched_rwlock_write(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_rwlock_write(lock); \ }) static inline void cond_resched_rcu(void) { #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) rcu_read_unlock(); cond_resched(); rcu_read_lock(); #endif } #ifdef CONFIG_PREEMPT_DYNAMIC extern bool preempt_model_none(void); extern bool preempt_model_voluntary(void); extern bool preempt_model_full(void); #else static inline bool preempt_model_none(void) { return IS_ENABLED(CONFIG_PREEMPT_NONE); } static inline bool preempt_model_voluntary(void) { return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); } static inline bool preempt_model_full(void) { return IS_ENABLED(CONFIG_PREEMPT); } #endif static inline bool preempt_model_rt(void) { return IS_ENABLED(CONFIG_PREEMPT_RT); } /* * Does the preemption model allow non-cooperative preemption? * * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the * PREEMPT_NONE model. */ static inline bool preempt_model_preemptible(void) { return preempt_model_full() || preempt_model_rt(); } /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPTION, * but a general need for low latency) */ static inline int spin_needbreak(spinlock_t *lock) { #ifdef CONFIG_PREEMPTION return spin_is_contended(lock); #else return 0; #endif } /* * Check if a rwlock is contended. * Returns non-zero if there is another task waiting on the rwlock. * Returns zero if the lock is not contended or the system / underlying * rwlock implementation does not support contention detection. * Technically does not depend on CONFIG_PREEMPTION, but a general need * for low latency. */ static inline int rwlock_needbreak(rwlock_t *lock) { #ifdef CONFIG_PREEMPTION return rwlock_is_contended(lock); #else return 0; #endif } static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); } /* * Wrappers for p->thread_info->cpu access. No-op on UP. */ #ifdef CONFIG_SMP static inline unsigned int task_cpu(const struct task_struct *p) { return READ_ONCE(task_thread_info(p)->cpu); } extern void set_task_cpu(struct task_struct *p, unsigned int cpu); #else static inline unsigned int task_cpu(const struct task_struct *p) { return 0; } static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) { } #endif /* CONFIG_SMP */ extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); /* * In order to reduce various lock holder preemption latencies provide an * interface to see if a vCPU is currently running or not. * * This allows us to terminate optimistic spin loops and block, analogous to * the native optimistic spin heuristic of testing if the lock owner task is * running or not. */ #ifndef vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { return false; } #endif extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif #ifdef CONFIG_SMP static inline bool owner_on_cpu(struct task_struct *owner) { /* * As lock holder preemption issue, we both skip spinning if * task is not on cpu or its cpu is preempted */ return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner)); } /* Returns effective CPU energy utilization, as seen by the scheduler */ unsigned long sched_cpu_util(int cpu); #endif /* CONFIG_SMP */ #ifdef CONFIG_RSEQ /* * Map the event mask on the user-space ABI enum rseq_cs_flags * for direct mask checks. */ enum rseq_event_mask_bits { RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, }; enum rseq_event_mask { RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), }; static inline void rseq_set_notify_resume(struct task_struct *t) { if (t->rseq) set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { if (current->rseq) __rseq_handle_notify_resume(ksig, regs); } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { preempt_disable(); __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); preempt_enable(); rseq_handle_notify_resume(ksig, regs); } /* rseq_preempt() requires preemption to be disabled. */ static inline void rseq_preempt(struct task_struct *t) { __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); rseq_set_notify_resume(t); } /* rseq_migrate() requires preemption to be disabled. */ static inline void rseq_migrate(struct task_struct *t) { __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); rseq_set_notify_resume(t); } /* * If parent process has a registered restartable sequences area, the * child inherits. Unregister rseq for a clone with CLONE_VM set. */ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { if (clone_flags & CLONE_VM) { t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } else { t->rseq = current->rseq; t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; t->rseq_event_mask = current->rseq_event_mask; } } static inline void rseq_execve(struct task_struct *t) { t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } #else static inline void rseq_set_notify_resume(struct task_struct *t) { } static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_preempt(struct task_struct *t) { } static inline void rseq_migrate(struct task_struct *t) { } static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { } static inline void rseq_execve(struct task_struct *t) { } #endif #ifdef CONFIG_DEBUG_RSEQ void rseq_syscall(struct pt_regs *regs); #else static inline void rseq_syscall(struct pt_regs *regs) { } #endif #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, unsigned long uaddr); extern int sched_core_idle_cpu(int cpu); #else static inline void sched_core_free(struct task_struct *tsk) { } static inline void sched_core_fork(struct task_struct *p) { } static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } #endif extern void sched_set_stop_task(int cpu, struct task_struct *stop); #endif |
2 80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_INTERNAL_H #define __CGROUP_INTERNAL_H #include <linux/cgroup.h> #include <linux/kernfs.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/refcount.h> #include <linux/fs_parser.h> #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; extern void __init enable_debug_cgroup(void); /* * cgroup_path() takes a spin lock. It is good practice not to take * spin locks within trace point handlers, as they are mostly hidden * from normal view. As cgroup_path() can take the kernfs_rename_lock * spin lock, it is best to not call that function from the trace event * handler. * * Note: trace_cgroup_##type##_enabled() is a static branch that will only * be set when the trace event is enabled. */ #define TRACE_CGROUP_PATH(type, cgrp, ...) \ do { \ if (trace_cgroup_##type##_enabled()) { \ unsigned long flags; \ spin_lock_irqsave(&trace_cgroup_path_lock, \ flags); \ cgroup_path(cgrp, trace_cgroup_path, \ TRACE_CGROUP_PATH_LEN); \ trace_cgroup_##type(cgrp, trace_cgroup_path, \ ##__VA_ARGS__); \ spin_unlock_irqrestore(&trace_cgroup_path_lock, \ flags); \ } \ } while (0) /* * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { struct kernfs_fs_context kfc; struct cgroup_root *root; struct cgroup_namespace *ns; unsigned int flags; /* CGRP_ROOT_* flags */ /* cgroup1 bits */ bool cpuset_clone_children; bool none; /* User explicitly requested empty subsystem */ bool all_ss; /* Seen 'all' option */ u16 subsys_mask; /* Selected subsystems */ char *name; /* Hierarchy name */ char *release_agent; /* Path for release notifications */ }; static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; return container_of(kfc, struct cgroup_fs_context, kfc); } struct cgroup_pidlist; struct cgroup_file_ctx { struct cgroup_namespace *ns; struct { void *trigger; } psi; struct { bool started; struct css_task_iter iter; } procs; struct { struct cgroup_pidlist *pidlist; } procs1; }; /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other * direction, a css_set is naturally associated with multiple cgroups. * This M:N relationship is represented by the following link structure * which exists for each association and allows traversing the associations * from both sides. */ struct cgrp_cset_link { /* the cgroup and css_set this link associates */ struct cgroup *cgrp; struct css_set *cset; /* list of cgrp_cset_links anchored at cgrp->cset_links */ struct list_head cset_link; /* list of cgrp_cset_links anchored at css_set->cgrp_links */ struct list_head cgrp_link; }; /* used to track tasks and csets during migration */ struct cgroup_taskset { /* the src and dst cset list running through cset->mg_node */ struct list_head src_csets; struct list_head dst_csets; /* the number of tasks in the set */ int nr_tasks; /* the subsys currently being processed */ int ssid; /* * Fields for cgroup_taskset_*() iteration. * * Before migration is committed, the target migration tasks are on * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of * the csets on ->dst_csets. ->csets point to either ->src_csets * or ->dst_csets depending on whether migration is committed. * * ->cur_csets and ->cur_task point to the current task position * during iteration. */ struct list_head *csets; struct css_set *cur_cset; struct task_struct *cur_task; }; /* migration context also tracks preloading */ struct cgroup_mgctx { /* * Preloaded source and destination csets. Used to guarantee * atomic success or failure on actual migration. */ struct list_head preloaded_src_csets; struct list_head preloaded_dst_csets; /* tasks and csets to migrate */ struct cgroup_taskset tset; /* subsystems affected by migration */ u16 ss_mask; }; #define CGROUP_TASKSET_INIT(tset) \ { \ .src_csets = LIST_HEAD_INIT(tset.src_csets), \ .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \ .csets = &tset.src_csets, \ } #define CGROUP_MGCTX_INIT(name) \ { \ LIST_HEAD_INIT(name.preloaded_src_csets), \ LIST_HEAD_INIT(name.preloaded_dst_csets), \ CGROUP_TASKSET_INIT(name.tset), \ } #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; /* iterate across the hierarchies */ #define for_each_root(root) \ list_for_each_entry((root), &cgroup_roots, root_list) /** * for_each_subsys - iterate all enabled cgroup subsystems * @ss: the iteration cursor * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end */ #define for_each_subsys(ss, ssid) \ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \ (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) static inline bool cgroup_is_dead(const struct cgroup *cgrp) { return !(cgrp->self.flags & CSS_ONLINE); } static inline bool notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } void put_css_set_locked(struct css_set *cset); static inline void put_css_set(struct css_set *cset) { unsigned long flags; /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an * rwlock */ if (refcount_dec_not_one(&cset->refcount)) return; spin_lock_irqsave(&css_set_lock, flags); put_css_set_locked(cset); spin_unlock_irqrestore(&css_set_lock, flags); } /* * refcounted get/put for css_set objects */ static inline void get_css_set(struct css_set *cset) { refcount_inc(&cset->refcount); } bool cgroup_ssid_enabled(int ssid); bool cgroup_on_dfl(const struct cgroup *cgrp); struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup_root *root); struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline); void cgroup_kn_unlock(struct kernfs_node *kn); int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_favor_dynmods(struct cgroup_root *root, bool favor); void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); int cgroup_do_get_tree(struct fs_context *fc); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, struct cgroup_mgctx *mgctx); int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx); int cgroup_migrate(struct task_struct *leader, bool threadgroup, struct cgroup_mgctx *mgctx); int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup); void cgroup_attach_lock(bool lock_threadgroup); void cgroup_attach_unlock(bool lock_threadgroup); struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, bool *locked) __acquires(&cgroup_threadgroup_rwsem); void cgroup_procs_write_finish(struct task_struct *task, bool locked) __releases(&cgroup_threadgroup_rwsem); void cgroup_lock_and_drain_offline(struct cgroup *cgrp); int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode); int cgroup_rmdir(struct kernfs_node *kn); int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, struct kernfs_root *kf_root); int __cgroup_task_count(const struct cgroup *cgrp); int cgroup_task_count(const struct cgroup *cgrp); /* * rstat.c */ int cgroup_rstat_init(struct cgroup *cgrp); void cgroup_rstat_exit(struct cgroup *cgrp); void cgroup_rstat_boot(void); void cgroup_base_stat_cputime_show(struct seq_file *seq); /* * namespace.c */ extern const struct proc_ns_operations cgroupns_operations; /* * cgroup-v1.c */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; extern const struct fs_parameter_spec cgroup1_fs_parameters[]; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param); int cgroup1_get_tree(struct fs_context *fc); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ |
8 7 7 1 1 11 10 21 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 */ #include <linux/fs.h> #include <linux/slab.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_unicode.h" #include "jfs_debug.h" /* * NAME: jfs_strfromUCS() * * FUNCTION: Convert little-endian unicode string to character string * */ int jfs_strfromUCS_le(char *to, const __le16 * from, int len, struct nls_table *codepage) { int i; int outlen = 0; static int warn_again = 5; /* Only warn up to 5 times total */ int warn = !!warn_again; /* once per string */ if (codepage) { for (i = 0; (i < len) && from[i]; i++) { int charlen; charlen = codepage->uni2char(le16_to_cpu(from[i]), &to[outlen], NLS_MAX_CHARSET_SIZE); if (charlen > 0) outlen += charlen; else to[outlen++] = '?'; } } else { for (i = 0; (i < len) && from[i]; i++) { if (unlikely(le16_to_cpu(from[i]) & 0xff00)) { to[i] = '?'; if (unlikely(warn)) { warn--; warn_again--; printk(KERN_ERR "non-latin1 character 0x%x found in JFS file name\n", le16_to_cpu(from[i])); printk(KERN_ERR "mount with iocharset=utf8 to access\n"); } } else to[i] = (char) (le16_to_cpu(from[i])); } outlen = i; } to[outlen] = 0; return outlen; } /* * NAME: jfs_strtoUCS() * * FUNCTION: Convert character string to unicode string * */ static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len, struct nls_table *codepage) { int charlen; int i; if (codepage) { for (i = 0; len && *from; i++, from += charlen, len -= charlen) { charlen = codepage->char2uni(from, len, &to[i]); if (charlen < 1) { jfs_err("jfs_strtoUCS: char2uni returned %d.", charlen); jfs_err("charset = %s, char = 0x%x", codepage->charset, *from); return charlen; } } } else { for (i = 0; (i < len) && from[i]; i++) to[i] = (wchar_t) from[i]; } to[i] = 0; return i; } /* * NAME: get_UCSname() * * FUNCTION: Allocate and translate to unicode string * */ int get_UCSname(struct component_name * uniName, struct dentry *dentry) { struct nls_table *nls_tab = JFS_SBI(dentry->d_sb)->nls_tab; int length = dentry->d_name.len; if (length > JFS_NAME_MAX) return -ENAMETOOLONG; uniName->name = kmalloc_array(length + 1, sizeof(wchar_t), GFP_NOFS); if (uniName->name == NULL) return -ENOMEM; uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, length, nls_tab); if (uniName->namlen < 0) { kfree(uniName->name); return uniName->namlen; } return 0; } |
12 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PACKET_INTERNAL_H__ #define __PACKET_INTERNAL_H__ #include <linux/refcount.h> struct packet_mclist { struct packet_mclist *next; int ifindex; int count; unsigned short type; unsigned short alen; unsigned char addr[MAX_ADDR_LEN]; }; /* kbdq - kernel block descriptor queue */ struct tpacket_kbdq_core { struct pgv *pkbdq; unsigned int feature_req_word; unsigned int hdrlen; unsigned char reset_pending_on_curr_blk; unsigned char delete_blk_timer; unsigned short kactive_blk_num; unsigned short blk_sizeof_priv; /* last_kactive_blk_num: * trick to see if user-space has caught up * in order to avoid refreshing timer when every single pkt arrives. */ unsigned short last_kactive_blk_num; char *pkblk_start; char *pkblk_end; int kblk_size; unsigned int max_frame_len; unsigned int knum_blocks; uint64_t knxt_seq_num; char *prev; char *nxt_offset; struct sk_buff *skb; rwlock_t blk_fill_in_prog_lock; /* Default is set to 8ms */ #define DEFAULT_PRB_RETIRE_TOV (8) unsigned short retire_blk_tov; unsigned short version; unsigned long tov_in_jiffies; /* timer to retire an outstanding block */ struct timer_list retire_blk_timer; }; struct pgv { char *buffer; }; struct packet_ring_buffer { struct pgv *pg_vec; unsigned int head; unsigned int frames_per_block; unsigned int frame_size; unsigned int frame_max; unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; unsigned int __percpu *pending_refcnt; union { unsigned long *rx_owner_map; struct tpacket_kbdq_core prb_bdqc; }; }; extern struct mutex fanout_mutex; #define PACKET_FANOUT_MAX (1 << 16) struct packet_fanout { possible_net_t net; unsigned int num_members; u32 max_num_members; u16 id; u8 type; u8 flags; union { atomic_t rr_cur; struct bpf_prog __rcu *bpf_prog; }; struct list_head list; spinlock_t lock; refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; struct sock __rcu *arr[] __counted_by(max_num_members); }; struct packet_rollover { int sock; atomic_long_t num; atomic_long_t num_huge; atomic_long_t num_failed; #define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32)) u32 history[ROLLOVER_HLEN] ____cacheline_aligned; } ____cacheline_aligned_in_smp; struct packet_sock { /* struct sock has to be the first member of packet_sock */ struct sock sk; struct packet_fanout *fanout; union tpacket_stats_u stats; struct packet_ring_buffer rx_ring; struct packet_ring_buffer tx_ring; int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; int ifindex; /* bound device */ u8 vnet_hdr_sz; __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; atomic_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_tstamp; struct completion skb_completion; struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; atomic_t tp_drops ____cacheline_aligned_in_smp; }; #define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, PACKET_SOCK_RUNNING, PACKET_SOCK_PRESSURE, PACKET_SOCK_QDISC_BYPASS, }; static inline void packet_sock_flag_set(struct packet_sock *po, enum packet_sock_flags flag, bool val) { if (val) set_bit(flag, &po->flags); else clear_bit(flag, &po->flags); } static inline bool packet_sock_flag(const struct packet_sock *po, enum packet_sock_flags flag) { return test_bit(flag, &po->flags); } #endif |
6 3 3 5 8 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ECB: Electronic CodeBook mode * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> static int crypto_ecb_crypt(struct crypto_cipher *cipher, const u8 *src, u8 *dst, unsigned nbytes, bool final, void (*fn)(struct crypto_tfm *, u8 *, const u8 *)) { const unsigned int bsize = crypto_cipher_blocksize(cipher); while (nbytes >= bsize) { fn(crypto_cipher_tfm(cipher), dst, src); src += bsize; dst += bsize; nbytes -= bsize; } return nbytes && final ? -EINVAL : nbytes; } static int crypto_ecb_encrypt2(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, bool final) { struct crypto_cipher **ctx = crypto_lskcipher_ctx(tfm); struct crypto_cipher *cipher = *ctx; return crypto_ecb_crypt(cipher, src, dst, len, final, crypto_cipher_alg(cipher)->cia_encrypt); } static int crypto_ecb_decrypt2(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *iv, bool final) { struct crypto_cipher **ctx = crypto_lskcipher_ctx(tfm); struct crypto_cipher *cipher = *ctx; return crypto_ecb_crypt(cipher, src, dst, len, final, crypto_cipher_alg(cipher)->cia_decrypt); } static int lskcipher_setkey_simple2(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen) { struct crypto_cipher **ctx = crypto_lskcipher_ctx(tfm); struct crypto_cipher *cipher = *ctx; crypto_cipher_clear_flags(cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(cipher, crypto_lskcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); return crypto_cipher_setkey(cipher, key, keylen); } static int lskcipher_init_tfm_simple2(struct crypto_lskcipher *tfm) { struct lskcipher_instance *inst = lskcipher_alg_instance(tfm); struct crypto_cipher **ctx = crypto_lskcipher_ctx(tfm); struct crypto_cipher_spawn *spawn; struct crypto_cipher *cipher; spawn = lskcipher_instance_ctx(inst); cipher = crypto_spawn_cipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); *ctx = cipher; return 0; } static void lskcipher_exit_tfm_simple2(struct crypto_lskcipher *tfm) { struct crypto_cipher **ctx = crypto_lskcipher_ctx(tfm); crypto_free_cipher(*ctx); } static void lskcipher_free_instance_simple2(struct lskcipher_instance *inst) { crypto_drop_cipher(lskcipher_instance_ctx(inst)); kfree(inst); } static struct lskcipher_instance *lskcipher_alloc_instance_simple2( struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_cipher_spawn *spawn; struct lskcipher_instance *inst; struct crypto_alg *cipher_alg; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_LSKCIPHER, &mask); if (err) return ERR_PTR(err); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return ERR_PTR(-ENOMEM); spawn = lskcipher_instance_ctx(inst); err = crypto_grab_cipher(spawn, lskcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; cipher_alg = crypto_spawn_cipher_alg(spawn); err = crypto_inst_setname(lskcipher_crypto_instance(inst), tmpl->name, cipher_alg); if (err) goto err_free_inst; inst->free = lskcipher_free_instance_simple2; /* Default algorithm properties, can be overridden */ inst->alg.co.base.cra_blocksize = cipher_alg->cra_blocksize; inst->alg.co.base.cra_alignmask = cipher_alg->cra_alignmask; inst->alg.co.base.cra_priority = cipher_alg->cra_priority; inst->alg.co.min_keysize = cipher_alg->cra_cipher.cia_min_keysize; inst->alg.co.max_keysize = cipher_alg->cra_cipher.cia_max_keysize; inst->alg.co.ivsize = cipher_alg->cra_blocksize; /* Use struct crypto_cipher * by default, can be overridden */ inst->alg.co.base.cra_ctxsize = sizeof(struct crypto_cipher *); inst->alg.setkey = lskcipher_setkey_simple2; inst->alg.init = lskcipher_init_tfm_simple2; inst->alg.exit = lskcipher_exit_tfm_simple2; return inst; err_free_inst: lskcipher_free_instance_simple2(inst); return ERR_PTR(err); } static int crypto_ecb_create2(struct crypto_template *tmpl, struct rtattr **tb) { struct lskcipher_instance *inst; int err; inst = lskcipher_alloc_instance_simple2(tmpl, tb); if (IS_ERR(inst)) return PTR_ERR(inst); /* ECB mode doesn't take an IV */ inst->alg.co.ivsize = 0; inst->alg.encrypt = crypto_ecb_encrypt2; inst->alg.decrypt = crypto_ecb_decrypt2; err = lskcipher_register_instance(tmpl, inst); if (err) inst->free(inst); return err; } static int crypto_ecb_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_lskcipher_spawn *spawn; struct lskcipher_alg *cipher_alg; struct lskcipher_instance *inst; int err; inst = lskcipher_alloc_instance_simple(tmpl, tb); if (IS_ERR(inst)) { err = crypto_ecb_create2(tmpl, tb); return err; } spawn = lskcipher_instance_ctx(inst); cipher_alg = crypto_lskcipher_spawn_alg(spawn); /* ECB mode doesn't take an IV */ inst->alg.co.ivsize = 0; if (cipher_alg->co.ivsize) return -EINVAL; inst->alg.co.base.cra_ctxsize = cipher_alg->co.base.cra_ctxsize; inst->alg.setkey = cipher_alg->setkey; inst->alg.encrypt = cipher_alg->encrypt; inst->alg.decrypt = cipher_alg->decrypt; inst->alg.init = cipher_alg->init; inst->alg.exit = cipher_alg->exit; err = lskcipher_register_instance(tmpl, inst); if (err) inst->free(inst); return err; } static struct crypto_template crypto_ecb_tmpl = { .name = "ecb", .create = crypto_ecb_create, .module = THIS_MODULE, }; static int __init crypto_ecb_module_init(void) { return crypto_register_template(&crypto_ecb_tmpl); } static void __exit crypto_ecb_module_exit(void) { crypto_unregister_template(&crypto_ecb_tmpl); } subsys_initcall(crypto_ecb_module_init); module_exit(crypto_ecb_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ECB block cipher mode of operation"); MODULE_ALIAS_CRYPTO("ecb"); MODULE_IMPORT_NS(CRYPTO_INTERNAL); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IEEE802154_CORE_H #define __IEEE802154_CORE_H #include <net/cfg802154.h> struct cfg802154_registered_device { const struct cfg802154_ops *ops; struct list_head list; /* wpan_phy index, internal only */ int wpan_phy_idx; /* also protected by devlist_mtx */ int opencount; wait_queue_head_t dev_wait; /* protected by RTNL only */ int num_running_ifaces; /* associated wpan interfaces, protected by rtnl or RCU */ struct list_head wpan_dev_list; int devlist_generation, wpan_dev_id; /* must be last because of the way we do wpan_phy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wpan_phy wpan_phy __aligned(NETDEV_ALIGN); }; static inline struct cfg802154_registered_device * wpan_phy_to_rdev(struct wpan_phy *wpan_phy) { BUG_ON(!wpan_phy); return container_of(wpan_phy, struct cfg802154_registered_device, wpan_phy); } extern struct list_head cfg802154_rdev_list; extern int cfg802154_rdev_list_generation; int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, struct net *net); /* free object */ void cfg802154_dev_free(struct cfg802154_registered_device *rdev); struct cfg802154_registered_device * cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx); struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx); #endif /* __IEEE802154_CORE_H */ |
6 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * Common Blowfish algorithm parts shared between the c and assembler * implementations. * * Blowfish Cipher Algorithm, by Bruce Schneier. * http://www.counterpane.com/blowfish.html * * Adapted from Kerneli implementation. * * Copyright (c) Herbert Valerio Riedel <hvr@hvrlab.org> * Copyright (c) Kyle McMartin <kyle@debian.org> * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> */ #include <crypto/algapi.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <asm/byteorder.h> #include <linux/types.h> #include <crypto/blowfish.h> static const u32 bf_pbox[16 + 2] = { 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, 0x9216d5d9, 0x8979fb1b, }; static const u32 bf_sbox[256 * 4] = { 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a, 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7, 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0, 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6, }; /* * Round loop unrolling macros, S is a pointer to a S-Box array * organized in 4 unsigned longs at a row. */ #define GET32_3(x) (((x) & 0xff)) #define GET32_2(x) (((x) >> (8)) & (0xff)) #define GET32_1(x) (((x) >> (16)) & (0xff)) #define GET32_0(x) (((x) >> (24)) & (0xff)) #define bf_F(x) (((S[GET32_0(x)] + S[256 + GET32_1(x)]) ^ \ S[512 + GET32_2(x)]) + S[768 + GET32_3(x)]) #define ROUND(a, b, n) ({ b ^= P[n]; a ^= bf_F(b); }) /* * The blowfish encipher, processes 64-bit blocks. * NOTE: This function MUSTN'T respect endianess */ static void encrypt_block(struct bf_ctx *bctx, u32 *dst, u32 *src) { const u32 *P = bctx->p; const u32 *S = bctx->s; u32 yl = src[0]; u32 yr = src[1]; ROUND(yr, yl, 0); ROUND(yl, yr, 1); ROUND(yr, yl, 2); ROUND(yl, yr, 3); ROUND(yr, yl, 4); ROUND(yl, yr, 5); ROUND(yr, yl, 6); ROUND(yl, yr, 7); ROUND(yr, yl, 8); ROUND(yl, yr, 9); ROUND(yr, yl, 10); ROUND(yl, yr, 11); ROUND(yr, yl, 12); ROUND(yl, yr, 13); ROUND(yr, yl, 14); ROUND(yl, yr, 15); yl ^= P[16]; yr ^= P[17]; dst[0] = yr; dst[1] = yl; } /* * Calculates the blowfish S and P boxes for encryption and decryption. */ int blowfish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct bf_ctx *ctx = crypto_tfm_ctx(tfm); u32 *P = ctx->p; u32 *S = ctx->s; short i, j, count; u32 data[2], temp; /* Copy the initialization s-boxes */ for (i = 0, count = 0; i < 256; i++) for (j = 0; j < 4; j++, count++) S[count] = bf_sbox[count]; /* Set the p-boxes */ for (i = 0; i < 16 + 2; i++) P[i] = bf_pbox[i]; /* Actual subkey generation */ for (j = 0, i = 0; i < 16 + 2; i++) { temp = (((u32)key[j] << 24) | ((u32)key[(j + 1) % keylen] << 16) | ((u32)key[(j + 2) % keylen] << 8) | ((u32)key[(j + 3) % keylen])); P[i] = P[i] ^ temp; j = (j + 4) % keylen; } data[0] = 0x00000000; data[1] = 0x00000000; for (i = 0; i < 16 + 2; i += 2) { encrypt_block((struct bf_ctx *)ctx, data, data); P[i] = data[0]; P[i + 1] = data[1]; } for (i = 0; i < 4; i++) { for (j = 0, count = i * 256; j < 256; j += 2, count += 2) { encrypt_block((struct bf_ctx *)ctx, data, data); S[count] = data[0]; S[count + 1] = data[1]; } } /* Bruce says not to bother with the weak key check. */ return 0; } EXPORT_SYMBOL_GPL(blowfish_setkey); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher common functions"); |
6 585 6 6 583 520 520 521 521 6 1 6 521 521 234 234 1 1 1 517 516 233 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | #include <linux/atomic.h> #include <linux/export.h> #include <linux/generic-radix-tree.h> #include <linux/gfp.h> #include <linux/kmemleak.h> #define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *)) #define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) struct genradix_node { union { /* Interior node: */ struct genradix_node *children[GENRADIX_ARY]; /* Leaf: */ u8 data[PAGE_SIZE]; }; }; static inline int genradix_depth_shift(unsigned depth) { return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth; } /* * Returns size (of data, in bytes) that a tree of a given depth holds: */ static inline size_t genradix_depth_size(unsigned depth) { return 1UL << genradix_depth_shift(depth); } /* depth that's needed for a genradix that can address up to ULONG_MAX: */ #define GENRADIX_MAX_DEPTH \ DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT) #define GENRADIX_DEPTH_MASK \ ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) static inline unsigned genradix_root_to_depth(struct genradix_root *r) { return (unsigned long) r & GENRADIX_DEPTH_MASK; } static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r) { return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); } /* * Returns pointer to the specified byte @offset within @radix, or NULL if not * allocated */ void *__genradix_ptr(struct __genradix *radix, size_t offset) { struct genradix_root *r = READ_ONCE(radix->root); struct genradix_node *n = genradix_root_to_node(r); unsigned level = genradix_root_to_depth(r); if (ilog2(offset) >= genradix_depth_shift(level)) return NULL; while (1) { if (!n) return NULL; if (!level) break; level--; n = n->children[offset >> genradix_depth_shift(level)]; offset &= genradix_depth_size(level) - 1; } return &n->data[offset]; } EXPORT_SYMBOL(__genradix_ptr); static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) { struct genradix_node *node; node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO); /* * We're using pages (not slab allocations) directly for kernel data * structures, so we need to explicitly inform kmemleak of them in order * to avoid false positive memory leak reports. */ kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask); return node; } static inline void genradix_free_node(struct genradix_node *node) { kmemleak_free(node); free_page((unsigned long)node); } /* * Returns pointer to the specified byte @offset within @radix, allocating it if * necessary - newly allocated slots are always zeroed out: */ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, gfp_t gfp_mask) { struct genradix_root *v = READ_ONCE(radix->root); struct genradix_node *n, *new_node = NULL; unsigned level; /* Increase tree depth if necessary: */ while (1) { struct genradix_root *r = v, *new_root; n = genradix_root_to_node(r); level = genradix_root_to_depth(r); if (n && ilog2(offset) < genradix_depth_shift(level)) break; if (!new_node) { new_node = genradix_alloc_node(gfp_mask); if (!new_node) return NULL; } new_node->children[0] = n; new_root = ((struct genradix_root *) ((unsigned long) new_node | (n ? level + 1 : 0))); if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; } } while (level--) { struct genradix_node **p = &n->children[offset >> genradix_depth_shift(level)]; offset &= genradix_depth_size(level) - 1; n = READ_ONCE(*p); if (!n) { if (!new_node) { new_node = genradix_alloc_node(gfp_mask); if (!new_node) return NULL; } if (!(n = cmpxchg_release(p, NULL, new_node))) swap(n, new_node); } } if (new_node) genradix_free_node(new_node); return &n->data[offset]; } EXPORT_SYMBOL(__genradix_ptr_alloc); void *__genradix_iter_peek(struct genradix_iter *iter, struct __genradix *radix, size_t objs_per_page) { struct genradix_root *r; struct genradix_node *n; unsigned level, i; if (iter->offset == SIZE_MAX) return NULL; restart: r = READ_ONCE(radix->root); if (!r) return NULL; n = genradix_root_to_node(r); level = genradix_root_to_depth(r); if (ilog2(iter->offset) >= genradix_depth_shift(level)) return NULL; while (level) { level--; i = (iter->offset >> genradix_depth_shift(level)) & (GENRADIX_ARY - 1); while (!n->children[i]) { size_t objs_per_ptr = genradix_depth_size(level); if (iter->offset + objs_per_ptr < iter->offset) { iter->offset = SIZE_MAX; iter->pos = SIZE_MAX; return NULL; } i++; iter->offset = round_down(iter->offset + objs_per_ptr, objs_per_ptr); iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; if (i == GENRADIX_ARY) goto restart; } n = n->children[i]; } return &n->data[iter->offset & (PAGE_SIZE - 1)]; } EXPORT_SYMBOL(__genradix_iter_peek); void *__genradix_iter_peek_prev(struct genradix_iter *iter, struct __genradix *radix, size_t objs_per_page, size_t obj_size_plus_page_remainder) { struct genradix_root *r; struct genradix_node *n; unsigned level, i; if (iter->offset == SIZE_MAX) return NULL; restart: r = READ_ONCE(radix->root); if (!r) return NULL; n = genradix_root_to_node(r); level = genradix_root_to_depth(r); if (ilog2(iter->offset) >= genradix_depth_shift(level)) { iter->offset = genradix_depth_size(level); iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; iter->offset -= obj_size_plus_page_remainder; iter->pos--; } while (level) { level--; i = (iter->offset >> genradix_depth_shift(level)) & (GENRADIX_ARY - 1); while (!n->children[i]) { size_t objs_per_ptr = genradix_depth_size(level); iter->offset = round_down(iter->offset, objs_per_ptr); iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; if (!iter->offset) return NULL; iter->offset -= obj_size_plus_page_remainder; iter->pos--; if (!i) goto restart; --i; } n = n->children[i]; } return &n->data[iter->offset & (PAGE_SIZE - 1)]; } EXPORT_SYMBOL(__genradix_iter_peek_prev); static void genradix_free_recurse(struct genradix_node *n, unsigned level) { if (level) { unsigned i; for (i = 0; i < GENRADIX_ARY; i++) if (n->children[i]) genradix_free_recurse(n->children[i], level - 1); } genradix_free_node(n); } int __genradix_prealloc(struct __genradix *radix, size_t size, gfp_t gfp_mask) { size_t offset; for (offset = 0; offset < size; offset += PAGE_SIZE) if (!__genradix_ptr_alloc(radix, offset, gfp_mask)) return -ENOMEM; return 0; } EXPORT_SYMBOL(__genradix_prealloc); void __genradix_free(struct __genradix *radix) { struct genradix_root *r = xchg(&radix->root, NULL); genradix_free_recurse(genradix_root_to_node(r), genradix_root_to_depth(r)); } EXPORT_SYMBOL(__genradix_free); |
11 1765 337 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Authentication token and access key management * * Copyright (C) 2004, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/security/keys/core.rst for information on keys/keyrings. */ #ifndef _LINUX_KEY_H #define _LINUX_KEY_H #include <linux/types.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> #include <linux/sysctl.h> #include <linux/rwsem.h> #include <linux/atomic.h> #include <linux/assoc_array.h> #include <linux/refcount.h> #include <linux/time64.h> #ifdef __KERNEL__ #include <linux/uidgid.h> /* key handle serial number */ typedef int32_t key_serial_t; /* key handle permissions mask */ typedef uint32_t key_perm_t; struct key; struct net; #ifdef CONFIG_KEYS #undef KEY_DEBUGGING #define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */ #define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */ #define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */ #define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */ #define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */ #define KEY_POS_SETATTR 0x20000000 /* possessor can set key attributes */ #define KEY_POS_ALL 0x3f000000 #define KEY_USR_VIEW 0x00010000 /* user permissions... */ #define KEY_USR_READ 0x00020000 #define KEY_USR_WRITE 0x00040000 #define KEY_USR_SEARCH 0x00080000 #define KEY_USR_LINK 0x00100000 #define KEY_USR_SETATTR 0x00200000 #define KEY_USR_ALL 0x003f0000 #define KEY_GRP_VIEW 0x00000100 /* group permissions... */ #define KEY_GRP_READ 0x00000200 #define KEY_GRP_WRITE 0x00000400 #define KEY_GRP_SEARCH 0x00000800 #define KEY_GRP_LINK 0x00001000 #define KEY_GRP_SETATTR 0x00002000 #define KEY_GRP_ALL 0x00003f00 #define KEY_OTH_VIEW 0x00000001 /* third party permissions... */ #define KEY_OTH_READ 0x00000002 #define KEY_OTH_WRITE 0x00000004 #define KEY_OTH_SEARCH 0x00000008 #define KEY_OTH_LINK 0x00000010 #define KEY_OTH_SETATTR 0x00000020 #define KEY_OTH_ALL 0x0000003f #define KEY_PERM_UNDEF 0xffffffff /* * The permissions required on a key that we're looking up. */ enum key_need_perm { KEY_NEED_UNSPECIFIED, /* Needed permission unspecified */ KEY_NEED_VIEW, /* Require permission to view attributes */ KEY_NEED_READ, /* Require permission to read content */ KEY_NEED_WRITE, /* Require permission to update / modify */ KEY_NEED_SEARCH, /* Require permission to search (keyring) or find (key) */ KEY_NEED_LINK, /* Require permission to link */ KEY_NEED_SETATTR, /* Require permission to change attributes */ KEY_NEED_UNLINK, /* Require permission to unlink key */ KEY_SYSADMIN_OVERRIDE, /* Special: override by CAP_SYS_ADMIN */ KEY_AUTHTOKEN_OVERRIDE, /* Special: override by possession of auth token */ KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */ }; enum key_lookup_flag { KEY_LOOKUP_CREATE = 0x01, KEY_LOOKUP_PARTIAL = 0x02, KEY_LOOKUP_ALL = (KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL), }; struct seq_file; struct user_struct; struct signal_struct; struct cred; struct key_type; struct key_owner; struct key_tag; struct keyring_list; struct keyring_name; struct key_tag { struct rcu_head rcu; refcount_t usage; bool removed; /* T when subject removed */ }; struct keyring_index_key { /* [!] If this structure is altered, the union in struct key must change too! */ unsigned long hash; /* Hash value */ union { struct { #ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */ u16 desc_len; char desc[sizeof(long) - 2]; /* First few chars of description */ #else char desc[sizeof(long) - 2]; /* First few chars of description */ u16 desc_len; #endif }; unsigned long x; }; struct key_type *type; struct key_tag *domain_tag; /* Domain of operation */ const char *description; }; union key_payload { void __rcu *rcu_data0; void *data[4]; }; /*****************************************************************************/ /* * key reference with possession attribute handling * * NOTE! key_ref_t is a typedef'd pointer to a type that is not actually * defined. This is because we abuse the bottom bit of the reference to carry a * flag to indicate whether the calling process possesses that key in one of * its keyrings. * * the key_ref_t has been made a separate type so that the compiler can reject * attempts to dereference it without proper conversion. * * the three functions are used to assemble and disassemble references */ typedef struct __key_reference_with_attributes *key_ref_t; static inline key_ref_t make_key_ref(const struct key *key, bool possession) { return (key_ref_t) ((unsigned long) key | possession); } static inline struct key *key_ref_to_ptr(const key_ref_t key_ref) { return (struct key *) ((unsigned long) key_ref & ~1UL); } static inline bool is_key_possessed(const key_ref_t key_ref) { return (unsigned long) key_ref & 1UL; } typedef int (*key_restrict_link_func_t)(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *restriction_key); struct key_restriction { key_restrict_link_func_t check; struct key *key; struct key_type *keytype; }; enum key_state { KEY_IS_UNINSTANTIATED, KEY_IS_POSITIVE, /* Positively instantiated */ }; /*****************************************************************************/ /* * authentication token / access credential / keyring * - types of key include: * - keyrings * - disk encryption IDs * - Kerberos TGTs and tickets */ struct key { refcount_t usage; /* number of references */ key_serial_t serial; /* key serial number */ union { struct list_head graveyard_link; struct rb_node serial_node; }; #ifdef CONFIG_KEY_NOTIFICATIONS struct watch_list *watchers; /* Entities watching this key for changes */ #endif struct rw_semaphore sem; /* change vs change sem */ struct key_user *user; /* owner of this key */ void *security; /* security data for this key */ union { time64_t expiry; /* time at which key expires (or 0) */ time64_t revoked_at; /* time at which key was revoked */ }; time64_t last_used_at; /* last time used for LRU keyring discard */ kuid_t uid; kgid_t gid; key_perm_t perm; /* access permissions */ unsigned short quotalen; /* length added to quota */ unsigned short datalen; /* payload data length * - may not match RCU dereferenced payload * - payload should contain own length */ short state; /* Key state (+) or rejection error (-) */ #ifdef KEY_DEBUGGING unsigned magic; #define KEY_DEBUG_MAGIC 0x18273645u #endif unsigned long flags; /* status flags (change with bitops) */ #define KEY_FLAG_DEAD 0 /* set if key type has been deleted */ #define KEY_FLAG_REVOKED 1 /* set if key had been revoked */ #define KEY_FLAG_IN_QUOTA 2 /* set if key consumes quota */ #define KEY_FLAG_USER_CONSTRUCT 3 /* set if key is being constructed in userspace */ #define KEY_FLAG_ROOT_CAN_CLEAR 4 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 5 /* set if key has been invalidated */ #define KEY_FLAG_BUILTIN 6 /* set if key is built in to the kernel */ #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ /* the key type and key description string * - the desc is used to match a key against search criteria * - it should be a printable string * - eg: for krb5 AFS, this might be "afs@REDHAT.COM" */ union { struct keyring_index_key index_key; struct { unsigned long hash; unsigned long len_desc; struct key_type *type; /* type of key */ struct key_tag *domain_tag; /* Domain of operation */ char *description; }; }; /* key data * - this is used to hold the data actually used in cryptography or * whatever */ union { union key_payload payload; struct { /* Keyring bits */ struct list_head name_link; struct assoc_array keys; }; }; /* This is set on a keyring to restrict the addition of a link to a key * to it. If this structure isn't provided then it is assumed that the * keyring is open to any addition. It is ignored for non-keyring * keys. Only set this value using keyring_restrict(), keyring_alloc(), * or key_alloc(). * * This is intended for use with rings of trusted keys whereby addition * to the keyring needs to be controlled. KEY_ALLOC_BYPASS_RESTRICTION * overrides this, allowing the kernel to add extra keys without * restriction. */ struct key_restriction *restrict_link; }; extern struct key *key_alloc(struct key_type *type, const char *desc, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, struct key_restriction *restrict_link); #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ #define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ #define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ #define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ #define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */ #define KEY_ALLOC_SET_KEEP 0x0020 /* Set the KEEP flag on the key/keyring */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); extern void key_put(struct key *key); extern bool key_put_tag(struct key_tag *tag); extern void key_remove_domain(struct key_tag *domain_tag); static inline struct key *__key_get(struct key *key) { refcount_inc(&key->usage); return key; } static inline struct key *key_get(struct key *key) { return key ? __key_get(key) : key; } static inline void key_ref_put(key_ref_t key_ref) { key_put(key_ref_to_ptr(key_ref)); } extern struct key *request_key_tag(struct key_type *type, const char *description, struct key_tag *domain_tag, const char *callout_info); extern struct key *request_key_rcu(struct key_type *type, const char *description, struct key_tag *domain_tag); extern struct key *request_key_with_auxdata(struct key_type *type, const char *description, struct key_tag *domain_tag, const void *callout_info, size_t callout_len, void *aux); /** * request_key - Request a key and wait for construction * @type: Type of key. * @description: The searchable description of the key. * @callout_info: The data to pass to the instantiation upcall (or NULL). * * As for request_key_tag(), but with the default global domain tag. */ static inline struct key *request_key(struct key_type *type, const char *description, const char *callout_info) { return request_key_tag(type, description, NULL, callout_info); } #ifdef CONFIG_NET /** * request_key_net - Request a key for a net namespace and wait for construction * @type: Type of key. * @description: The searchable description of the key. * @net: The network namespace that is the key's domain of operation. * @callout_info: The data to pass to the instantiation upcall (or NULL). * * As for request_key() except that it does not add the returned key to a * keyring if found, new keys are always allocated in the user's quota, the * callout_info must be a NUL-terminated string and no auxiliary data can be * passed. Only keys that operate the specified network namespace are used. * * Furthermore, it then works as wait_for_key_construction() to wait for the * completion of keys undergoing construction with a non-interruptible wait. */ #define request_key_net(type, description, net, callout_info) \ request_key_tag(type, description, net->key_domain, callout_info) /** * request_key_net_rcu - Request a key for a net namespace under RCU conditions * @type: Type of key. * @description: The searchable description of the key. * @net: The network namespace that is the key's domain of operation. * * As for request_key_rcu() except that only keys that operate the specified * network namespace are used. */ #define request_key_net_rcu(type, description, net) \ request_key_rcu(type, description, net->key_domain) #endif /* CONFIG_NET */ extern int wait_for_key_construction(struct key *key, bool intr); extern int key_validate(const struct key *key); extern key_ref_t key_create(key_ref_t keyring, const char *type, const char *description, const void *payload, size_t plen, key_perm_t perm, unsigned long flags); extern key_ref_t key_create_or_update(key_ref_t keyring, const char *type, const char *description, const void *payload, size_t plen, key_perm_t perm, unsigned long flags); extern int key_update(key_ref_t key, const void *payload, size_t plen); extern int key_link(struct key *keyring, struct key *key); extern int key_move(struct key *key, struct key *from_keyring, struct key *to_keyring, unsigned int flags); extern int key_unlink(struct key *keyring, struct key *key); extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, struct key_restriction *restrict_link, struct key *dest); extern int restrict_link_reject(struct key *keyring, const struct key_type *type, const union key_payload *payload, struct key *restriction_key); extern int keyring_clear(struct key *keyring); extern key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, const char *description, bool recurse); extern int keyring_add_key(struct key *keyring, struct key *key); extern int keyring_restrict(key_ref_t keyring, const char *type, const char *restriction); extern struct key *key_lookup(key_serial_t id); static inline key_serial_t key_serial(const struct key *key) { return key ? key->serial : 0; } extern void key_set_timeout(struct key *, unsigned); extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, enum key_need_perm need_perm); extern void key_free_user_ns(struct user_namespace *); static inline short key_read_state(const struct key *key) { /* Barrier versus mark_key_instantiated(). */ return smp_load_acquire(&key->state); } /** * key_is_positive - Determine if a key has been positively instantiated * @key: The key to check. * * Return true if the specified key has been positively instantiated, false * otherwise. */ static inline bool key_is_positive(const struct key *key) { return key_read_state(key) == KEY_IS_POSITIVE; } static inline bool key_is_negative(const struct key *key) { return key_read_state(key) < 0; } #define dereference_key_rcu(KEY) \ (rcu_dereference((KEY)->payload.rcu_data0)) #define dereference_key_locked(KEY) \ (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) #define rcu_assign_keypointer(KEY, PAYLOAD) \ do { \ rcu_assign_pointer((KEY)->payload.rcu_data0, (PAYLOAD)); \ } while (0) /* * the userspace interface */ extern int install_thread_keyring_to_cred(struct cred *cred); extern void key_fsuid_changed(struct cred *new_cred); extern void key_fsgid_changed(struct cred *new_cred); extern void key_init(void); #else /* CONFIG_KEYS */ #define key_validate(k) 0 #define key_serial(k) 0 #define key_get(k) ({ NULL; }) #define key_revoke(k) do { } while(0) #define key_invalidate(k) do { } while(0) #define key_put(k) do { } while(0) #define key_ref_put(k) do { } while(0) #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 #define key_fsuid_changed(c) do { } while(0) #define key_fsgid_changed(c) do { } while(0) #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) #define key_remove_domain(d) do { } while(0) #define key_lookup(k) NULL #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ #endif /* _LINUX_KEY_H */ |
61 73 81 87 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM migrate #if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MIGRATE_H #include <linux/tracepoint.h> #define MIGRATE_MODE \ EM( MIGRATE_ASYNC, "MIGRATE_ASYNC") \ EM( MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT") \ EMe(MIGRATE_SYNC, "MIGRATE_SYNC") #define MIGRATE_REASON \ EM( MR_COMPACTION, "compaction") \ EM( MR_MEMORY_FAILURE, "memory_failure") \ EM( MR_MEMORY_HOTPLUG, "memory_hotplug") \ EM( MR_SYSCALL, "syscall_or_cpuset") \ EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ EM( MR_NUMA_MISPLACED, "numa_misplaced") \ EM( MR_CONTIG_RANGE, "contig_range") \ EM( MR_LONGTERM_PIN, "longterm_pin") \ EMe(MR_DEMOTION, "demotion") /* * First define the enums in the above macros to be exported to userspace * via TRACE_DEFINE_ENUM(). */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); MIGRATE_MODE MIGRATE_REASON /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) {a, b}, #define EMe(a, b) {a, b} TRACE_EVENT(mm_migrate_pages, TP_PROTO(unsigned long succeeded, unsigned long failed, unsigned long thp_succeeded, unsigned long thp_failed, unsigned long thp_split, unsigned long large_folio_split, enum migrate_mode mode, int reason), TP_ARGS(succeeded, failed, thp_succeeded, thp_failed, thp_split, large_folio_split, mode, reason), TP_STRUCT__entry( __field( unsigned long, succeeded) __field( unsigned long, failed) __field( unsigned long, thp_succeeded) __field( unsigned long, thp_failed) __field( unsigned long, thp_split) __field( unsigned long, large_folio_split) __field( enum migrate_mode, mode) __field( int, reason) ), TP_fast_assign( __entry->succeeded = succeeded; __entry->failed = failed; __entry->thp_succeeded = thp_succeeded; __entry->thp_failed = thp_failed; __entry->thp_split = thp_split; __entry->large_folio_split = large_folio_split; __entry->mode = mode; __entry->reason = reason; ), TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu nr_split=%lu mode=%s reason=%s", __entry->succeeded, __entry->failed, __entry->thp_succeeded, __entry->thp_failed, __entry->thp_split, __entry->large_folio_split, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); TRACE_EVENT(mm_migrate_pages_start, TP_PROTO(enum migrate_mode mode, int reason), TP_ARGS(mode, reason), TP_STRUCT__entry( __field(enum migrate_mode, mode) __field(int, reason) ), TP_fast_assign( __entry->mode = mode; __entry->reason = reason; ), TP_printk("mode=%s reason=%s", __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); DECLARE_EVENT_CLASS(migration_pte, TP_PROTO(unsigned long addr, unsigned long pte, int order), TP_ARGS(addr, pte, order), TP_STRUCT__entry( __field(unsigned long, addr) __field(unsigned long, pte) __field(int, order) ), TP_fast_assign( __entry->addr = addr; __entry->pte = pte; __entry->order = order; ), TP_printk("addr=%lx, pte=%lx order=%d", __entry->addr, __entry->pte, __entry->order) ); DEFINE_EVENT(migration_pte, set_migration_pte, TP_PROTO(unsigned long addr, unsigned long pte, int order), TP_ARGS(addr, pte, order) ); DEFINE_EVENT(migration_pte, remove_migration_pte, TP_PROTO(unsigned long addr, unsigned long pte, int order), TP_ARGS(addr, pte, order) ); #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
75 75 4 73 3 71 73 74 41 54 1 53 53 1 45 11 44 97 95 3 38 3 44 43 1 37 1 52 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Forwarding decision * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/err.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/netpoll.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/netfilter_bridge.h> #include "br_private.h" /* Don't forward packets to originating port or forwarding disabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { struct net_bridge_vlan_group *vg; vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && p->state == BR_STATE_FORWARDING && br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && eth_type_vlan(skb->protocol)) { int depth; if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth)) goto drop; skb_set_network_header(skb, depth); } br_switchdev_frame_set_offload_fwd_mark(skb); dev_queue_xmit(skb); return 0; drop: kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_clear_tstamp(skb); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); } EXPORT_SYMBOL_GPL(br_forward_finish); static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_orig) { struct net_bridge_vlan_group *vg; struct net_device *indev; struct net *net; int br_hook; /* Mark the skb for forwarding offload early so that br_handle_vlan() * can know whether to pop the VLAN header on egress or keep it. */ nbp_switchdev_frame_mark_tx_fwd_offload(to, skb); vg = nbp_vlan_group_rcu(to); skb = br_handle_vlan(to->br, to, vg, skb); if (!skb) return; indev = skb->dev; skb->dev = to->dev; if (!local_orig) { if (skb_warn_if_lro(skb)) { kfree_skb(skb); return; } br_hook = NF_BR_FORWARD; skb_forward_csum(skb); net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); else br_netpoll_send_skb(to, skb); return; } br_hook = NF_BR_LOCAL_OUT; net = dev_net(skb->dev); indev = NULL; } NF_HOOK(NFPROTO_BRIDGE, br_hook, net, NULL, skb, indev, skb->dev, br_forward_finish); } static int deliver_clone(const struct net_bridge_port *prev, struct sk_buff *skb, bool local_orig) { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { DEV_STATS_INC(dev, tx_dropped); return -ENOMEM; } __br_forward(prev, skb, local_orig); return 0; } /** * br_forward - forward a packet to a specific port * @to: destination port * @skb: packet being forwarded * @local_rcv: packet will be received locally after forwarding * @local_orig: packet is locally originated * * Should be called with rcu_read_lock. */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig) { if (unlikely(!to)) goto out; /* redirect to backup link if the destination port is down */ if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) { struct net_bridge_port *backup_port; backup_port = rcu_dereference(to->backup_port); if (unlikely(!backup_port)) goto out; BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid); to = backup_port; } if (should_deliver(to, skb)) { if (local_rcv) deliver_clone(to, skb, local_orig); else __br_forward(to, skb, local_orig); return; } out: if (!local_rcv) kfree_skb(skb); } EXPORT_SYMBOL_GPL(br_forward); static struct net_bridge_port *maybe_deliver( struct net_bridge_port *prev, struct net_bridge_port *p, struct sk_buff *skb, bool local_orig) { u8 igmp_type = br_multicast_igmp_type(skb); int err; if (!should_deliver(p, skb)) return prev; nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb); if (!prev) goto out; err = deliver_clone(prev, skb, local_orig); if (err) return ERR_PTR(err); out: br_multicast_count(p->br, p, skb, igmp_type, BR_MCAST_DIR_TX); return p; } /* called under rcu_read_lock */ void br_flood(struct net_bridge *br, struct sk_buff *skb, enum br_pkt_type pkt_type, bool local_rcv, bool local_orig, u16 vid) { struct net_bridge_port *prev = NULL; struct net_bridge_port *p; br_tc_skb_miss_set(skb, pkt_type != BR_PKT_BROADCAST); list_for_each_entry_rcu(p, &br->port_list, list) { /* Do not flood unicast traffic to ports that turn it off, nor * other traffic if flood off, except for traffic we originate */ switch (pkt_type) { case BR_PKT_UNICAST: if (!(p->flags & BR_FLOOD)) continue; break; case BR_PKT_MULTICAST: if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) continue; break; case BR_PKT_BROADCAST: if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev) continue; break; } /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; if (BR_INPUT_SKB_CB(skb)->proxyarp_replied && ((p->flags & BR_PROXYARP_WIFI) || br_is_neigh_suppress_enabled(p, vid))) continue; prev = maybe_deliver(prev, p, skb, local_orig); if (IS_ERR(prev)) goto out; } if (!prev) goto out; if (local_rcv) deliver_clone(prev, skb, local_orig); else __br_forward(prev, skb, local_orig); return; out: if (!local_rcv) kfree_skb(skb); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, const unsigned char *addr, bool local_orig) { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; const unsigned char *src = eth_hdr(skb)->h_source; if (!should_deliver(p, skb)) return; /* Even with hairpin, no soliloquies - prevent breaking IPv6 DAD */ if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; skb = skb_copy(skb, GFP_ATOMIC); if (!skb) { DEV_STATS_INC(dev, tx_dropped); return; } if (!is_broadcast_ether_addr(addr)) memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); __br_forward(p, skb, local_orig); } /* called with rcu_read_lock */ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; bool allow_mode_include = true; struct hlist_node *rp; rp = br_multicast_get_first_rport_node(brmctx, skb); if (mdst) { p = rcu_dereference(mdst->ports); if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) && br_multicast_is_star_g(&mdst->addr)) allow_mode_include = false; } else { p = NULL; br_tc_skb_miss_set(skb, true); } while (p || rp) { struct net_bridge_port *port, *lport, *rport; lport = p ? p->key.port : NULL; rport = br_multicast_rport_from_node_skb(rp, skb); if ((unsigned long)lport > (unsigned long)rport) { port = lport; if (port->flags & BR_MULTICAST_TO_UNICAST) { maybe_deliver_addr(lport, skb, p->eth_addr, local_orig); goto delivered; } if ((!allow_mode_include && p->filter_mode == MCAST_INCLUDE) || (p->flags & MDB_PG_FLAGS_BLOCKED)) goto delivered; } else { port = rport; } prev = maybe_deliver(prev, port, skb, local_orig); if (IS_ERR(prev)) goto out; delivered: if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); if ((unsigned long)rport >= (unsigned long)port) rp = rcu_dereference(hlist_next_rcu(rp)); } if (!prev) goto out; if (local_rcv) deliver_clone(prev, skb, local_orig); else __br_forward(prev, skb, local_orig); return; out: if (!local_rcv) kfree_skb(skb); } #endif |
286 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 | // SPDX-License-Identifier: GPL-2.0-or-later /* Copyright 2020 NXP */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/slab.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_gate.h> #include <net/tc_wrapper.h> static struct tc_action_ops act_gate_ops; static ktime_t gate_get_time(struct tcf_gate *gact) { ktime_t mono = ktime_get(); switch (gact->tk_offset) { case TK_OFFS_MAX: return mono; default: return ktime_mono_to_any(mono, gact->tk_offset); } return KTIME_MAX; } static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) { struct tcf_gate_params *param = &gact->param; ktime_t now, base, cycle; u64 n; base = ns_to_ktime(param->tcfg_basetime); now = gate_get_time(gact); if (ktime_after(base, now)) { *start = base; return; } cycle = param->tcfg_cycletime; n = div64_u64(ktime_sub_ns(now, base), cycle); *start = ktime_add_ns(base, (n + 1) * cycle); } static void gate_start_timer(struct tcf_gate *gact, ktime_t start) { ktime_t expires; expires = hrtimer_get_expires(&gact->hitimer); if (expires == 0) expires = KTIME_MAX; start = min_t(ktime_t, start, expires); hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT); } static enum hrtimer_restart gate_timer_func(struct hrtimer *timer) { struct tcf_gate *gact = container_of(timer, struct tcf_gate, hitimer); struct tcf_gate_params *p = &gact->param; struct tcfg_gate_entry *next; ktime_t close_time, now; spin_lock(&gact->tcf_lock); next = gact->next_entry; /* cycle start, clear pending bit, clear total octets */ gact->current_gate_status = next->gate_state ? GATE_ACT_GATE_OPEN : 0; gact->current_entry_octets = 0; gact->current_max_octets = next->maxoctets; gact->current_close_time = ktime_add_ns(gact->current_close_time, next->interval); close_time = gact->current_close_time; if (list_is_last(&next->list, &p->entries)) next = list_first_entry(&p->entries, struct tcfg_gate_entry, list); else next = list_next_entry(next, list); now = gate_get_time(gact); if (ktime_after(now, close_time)) { ktime_t cycle, base; u64 n; cycle = p->tcfg_cycletime; base = ns_to_ktime(p->tcfg_basetime); n = div64_u64(ktime_sub_ns(now, base), cycle); close_time = ktime_add_ns(base, (n + 1) * cycle); } gact->next_entry = next; hrtimer_set_expires(&gact->hitimer, close_time); spin_unlock(&gact->tcf_lock); return HRTIMER_RESTART; } TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_gate *gact = to_gate(a); int action = READ_ONCE(gact->tcf_action); tcf_lastuse_update(&gact->tcf_tm); tcf_action_update_bstats(&gact->common, skb); spin_lock(&gact->tcf_lock); if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) { spin_unlock(&gact->tcf_lock); return action; } if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) { spin_unlock(&gact->tcf_lock); goto drop; } if (gact->current_max_octets >= 0) { gact->current_entry_octets += qdisc_pkt_len(skb); if (gact->current_entry_octets > gact->current_max_octets) { spin_unlock(&gact->tcf_lock); goto overlimit; } } spin_unlock(&gact->tcf_lock); return action; overlimit: tcf_action_inc_overlimit_qstats(&gact->common); drop: tcf_action_inc_drop_qstats(&gact->common); return TC_ACT_SHOT; } static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = { [TCA_GATE_ENTRY_INDEX] = { .type = NLA_U32 }, [TCA_GATE_ENTRY_GATE] = { .type = NLA_FLAG }, [TCA_GATE_ENTRY_INTERVAL] = { .type = NLA_U32 }, [TCA_GATE_ENTRY_IPV] = { .type = NLA_S32 }, [TCA_GATE_ENTRY_MAX_OCTETS] = { .type = NLA_S32 }, }; static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = { [TCA_GATE_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_gate)), [TCA_GATE_PRIORITY] = { .type = NLA_S32 }, [TCA_GATE_ENTRY_LIST] = { .type = NLA_NESTED }, [TCA_GATE_BASE_TIME] = { .type = NLA_U64 }, [TCA_GATE_CYCLE_TIME] = { .type = NLA_U64 }, [TCA_GATE_CYCLE_TIME_EXT] = { .type = NLA_U64 }, [TCA_GATE_FLAGS] = { .type = NLA_U32 }, [TCA_GATE_CLOCKID] = { .type = NLA_S32 }, }; static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry, struct netlink_ext_ack *extack) { u32 interval = 0; entry->gate_state = nla_get_flag(tb[TCA_GATE_ENTRY_GATE]); if (tb[TCA_GATE_ENTRY_INTERVAL]) interval = nla_get_u32(tb[TCA_GATE_ENTRY_INTERVAL]); if (interval == 0) { NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); return -EINVAL; } entry->interval = interval; if (tb[TCA_GATE_ENTRY_IPV]) entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]); else entry->ipv = -1; if (tb[TCA_GATE_ENTRY_MAX_OCTETS]) entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]); else entry->maxoctets = -1; return 0; } static int parse_gate_entry(struct nlattr *n, struct tcfg_gate_entry *entry, int index, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_GATE_ENTRY_MAX + 1] = { }; int err; err = nla_parse_nested(tb, TCA_GATE_ENTRY_MAX, n, entry_policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Could not parse nested entry"); return -EINVAL; } entry->index = index; return fill_gate_entry(tb, entry, extack); } static void release_entry_list(struct list_head *entries) { struct tcfg_gate_entry *entry, *e; list_for_each_entry_safe(entry, e, entries, list) { list_del(&entry->list); kfree(entry); } } static int parse_gate_list(struct nlattr *list_attr, struct tcf_gate_params *sched, struct netlink_ext_ack *extack) { struct tcfg_gate_entry *entry; struct nlattr *n; int err, rem; int i = 0; if (!list_attr) return -EINVAL; nla_for_each_nested(n, list_attr, rem) { if (nla_type(n) != TCA_GATE_ONE_ENTRY) { NL_SET_ERR_MSG(extack, "Attribute isn't type 'entry'"); continue; } entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { NL_SET_ERR_MSG(extack, "Not enough memory for entry"); err = -ENOMEM; goto release_list; } err = parse_gate_entry(n, entry, i, extack); if (err < 0) { kfree(entry); goto release_list; } list_add_tail(&entry->list, &sched->entries); i++; } sched->num_entries = i; return i; release_list: release_entry_list(&sched->entries); return err; } static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, enum tk_offsets tko, s32 clockid, bool do_init) { if (!do_init) { if (basetime == gact->param.tcfg_basetime && tko == gact->tk_offset && clockid == gact->param.tcfg_clockid) return; spin_unlock_bh(&gact->tcf_lock); hrtimer_cancel(&gact->hitimer); spin_lock_bh(&gact->tcf_lock); } gact->param.tcfg_basetime = basetime; gact->param.tcfg_clockid = clockid; gact->tk_offset = tko; hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); gact->hitimer.function = gate_timer_func; } static int tcf_gate_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); enum tk_offsets tk_offset = TK_OFFS_TAI; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; struct tcf_chain *goto_ch = NULL; u64 cycletime = 0, basetime = 0; struct tcf_gate_params *p; s32 clockid = CLOCK_TAI; struct tcf_gate *gact; struct tc_gate *parm; int ret = 0, err; u32 gflags = 0; s32 prio = -1; ktime_t start; u32 index; if (!nla) return -EINVAL; err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack); if (err < 0) return err; if (!tb[TCA_GATE_PARMS]) return -EINVAL; if (tb[TCA_GATE_CLOCKID]) { clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); switch (clockid) { case CLOCK_REALTIME: tk_offset = TK_OFFS_REAL; break; case CLOCK_MONOTONIC: tk_offset = TK_OFFS_MAX; break; case CLOCK_BOOTTIME: tk_offset = TK_OFFS_BOOT; break; case CLOCK_TAI: tk_offset = TK_OFFS_TAI; break; default: NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); return -EINVAL; } } parm = nla_data(tb[TCA_GATE_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; if (err && bind) return 0; if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_gate_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } if (tb[TCA_GATE_PRIORITY]) prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); if (tb[TCA_GATE_BASE_TIME]) basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]); if (tb[TCA_GATE_FLAGS]) gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); gact = to_gate(*a); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&gact->param.entries); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; spin_lock_bh(&gact->tcf_lock); p = &gact->param; if (tb[TCA_GATE_CYCLE_TIME]) cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); if (tb[TCA_GATE_ENTRY_LIST]) { err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); if (err < 0) goto chain_put; } if (!cycletime) { struct tcfg_gate_entry *entry; ktime_t cycle = 0; list_for_each_entry(entry, &p->entries, list) cycle = ktime_add_ns(cycle, entry->interval); cycletime = cycle; if (!cycletime) { err = -EINVAL; goto chain_put; } } p->tcfg_cycletime = cycletime; if (tb[TCA_GATE_CYCLE_TIME_EXT]) p->tcfg_cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); gate_setup_timer(gact, basetime, tk_offset, clockid, ret == ACT_P_CREATED); p->tcfg_priority = prio; p->tcfg_flags = gflags; gate_get_start_time(gact, &start); gact->current_close_time = start; gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; gact->next_entry = list_first_entry(&p->entries, struct tcfg_gate_entry, list); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); gate_start_timer(gact, start); spin_unlock_bh(&gact->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); return ret; chain_put: spin_unlock_bh(&gact->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: /* action is not inserted in any list: it's safe to init hitimer * without taking tcf_lock. */ if (ret == ACT_P_CREATED) gate_setup_timer(gact, gact->param.tcfg_basetime, gact->tk_offset, gact->param.tcfg_clockid, true); tcf_idr_release(*a, bind); return err; } static void tcf_gate_cleanup(struct tc_action *a) { struct tcf_gate *gact = to_gate(a); struct tcf_gate_params *p; p = &gact->param; hrtimer_cancel(&gact->hitimer); release_entry_list(&p->entries); } static int dumping_entry(struct sk_buff *skb, struct tcfg_gate_entry *entry) { struct nlattr *item; item = nla_nest_start_noflag(skb, TCA_GATE_ONE_ENTRY); if (!item) return -ENOSPC; if (nla_put_u32(skb, TCA_GATE_ENTRY_INDEX, entry->index)) goto nla_put_failure; if (entry->gate_state && nla_put_flag(skb, TCA_GATE_ENTRY_GATE)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GATE_ENTRY_INTERVAL, entry->interval)) goto nla_put_failure; if (nla_put_s32(skb, TCA_GATE_ENTRY_MAX_OCTETS, entry->maxoctets)) goto nla_put_failure; if (nla_put_s32(skb, TCA_GATE_ENTRY_IPV, entry->ipv)) goto nla_put_failure; return nla_nest_end(skb, item); nla_put_failure: nla_nest_cancel(skb, item); return -1; } static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_gate *gact = to_gate(a); struct tc_gate opt = { .index = gact->tcf_index, .refcnt = refcount_read(&gact->tcf_refcnt) - ref, .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind, }; struct tcfg_gate_entry *entry; struct tcf_gate_params *p; struct nlattr *entry_list; struct tcf_t t; spin_lock_bh(&gact->tcf_lock); opt.action = gact->tcf_action; p = &gact->param; if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME, p->tcfg_basetime, TCA_GATE_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME, p->tcfg_cycletime, TCA_GATE_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT, p->tcfg_cycletime_ext, TCA_GATE_PAD)) goto nla_put_failure; if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags)) goto nla_put_failure; if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority)) goto nla_put_failure; entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST); if (!entry_list) goto nla_put_failure; list_for_each_entry(entry, &p->entries, list) { if (dumping_entry(skb, entry) < 0) goto nla_put_failure; } nla_nest_end(skb, entry_list); tcf_tm_dump(&t, &gact->tcf_tm); if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD)) goto nla_put_failure; spin_unlock_bh(&gact->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&gact->tcf_lock); nlmsg_trim(skb, b); return -1; } static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_gate *gact = to_gate(a); struct tcf_t *tm = &gact->tcf_tm; tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static size_t tcf_gate_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_gate)); } static void tcf_gate_entry_destructor(void *priv) { struct action_gate_entry *oe = priv; kfree(oe); } static int tcf_gate_get_entries(struct flow_action_entry *entry, const struct tc_action *act) { entry->gate.entries = tcf_gate_get_list(act); if (!entry->gate.entries) return -EINVAL; entry->destructor = tcf_gate_entry_destructor; entry->destructor_priv = entry->gate.entries; return 0; } static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { int err; if (bind) { struct flow_action_entry *entry = entry_data; entry->id = FLOW_ACTION_GATE; entry->gate.prio = tcf_gate_prio(act); entry->gate.basetime = tcf_gate_basetime(act); entry->gate.cycletime = tcf_gate_cycletime(act); entry->gate.cycletimeext = tcf_gate_cycletimeext(act); entry->gate.num_entries = tcf_gate_num_entries(act); err = tcf_gate_get_entries(entry, act); if (err) return err; *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; fl_action->id = FLOW_ACTION_GATE; } return 0; } static struct tc_action_ops act_gate_ops = { .kind = "gate", .id = TCA_ID_GATE, .owner = THIS_MODULE, .act = tcf_gate_act, .dump = tcf_gate_dump, .init = tcf_gate_init, .cleanup = tcf_gate_cleanup, .stats_update = tcf_gate_stats_update, .get_fill_size = tcf_gate_get_fill_size, .offload_act_setup = tcf_gate_offload_act_setup, .size = sizeof(struct tcf_gate), }; static __net_init int gate_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); return tc_action_net_init(net, tn, &act_gate_ops); } static void __net_exit gate_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_gate_ops.net_id); } static struct pernet_operations gate_net_ops = { .init = gate_init_net, .exit_batch = gate_exit_net, .id = &act_gate_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init gate_init_module(void) { return tcf_register_action(&act_gate_ops, &gate_net_ops); } static void __exit gate_cleanup_module(void) { tcf_unregister_action(&act_gate_ops, &gate_net_ops); } module_init(gate_init_module); module_exit(gate_cleanup_module); MODULE_DESCRIPTION("TC gate action"); MODULE_LICENSE("GPL v2"); |
4 4 4 3 4 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | // SPDX-License-Identifier: GPL-2.0 /* * Parts of this file are * Copyright (C) 2022-2023 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/cfg80211.h> #include "nl80211.h" #include "core.h" #include "rdev-ops.h" static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, unsigned int link_id, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; lockdep_assert_wiphy(wdev->wiphy); if (!rdev->ops->stop_ap) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!wdev->links[link_id].ap.beacon_interval) return -ENOENT; err = rdev_stop_ap(rdev, dev, link_id); if (!err) { wdev->conn_owner_nlportid = 0; wdev->links[link_id].ap.beacon_interval = 0; memset(&wdev->links[link_id].ap.chandef, 0, sizeof(wdev->links[link_id].ap.chandef)); wdev->u.ap.ssid_len = 0; rdev_set_qos_map(rdev, dev, NULL); if (notify) nl80211_send_ap_stopped(wdev, link_id); /* Should we apply the grace period during beaconing interface * shutdown also? */ cfg80211_sched_dfs_chan_update(rdev); } schedule_work(&cfg80211_disconnect_work); return err; } int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, int link_id, bool notify) { unsigned int link; int ret = 0; if (link_id >= 0) return ___cfg80211_stop_ap(rdev, dev, link_id, notify); for_each_valid_link(dev->ieee80211_ptr, link) { int ret1 = ___cfg80211_stop_ap(rdev, dev, link, notify); if (ret1) ret = ret1; /* try the next one also if one errored */ } return ret; } |
4 4 4 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 | /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ #include <linux/kallsyms.h> #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/utsname.h> #include <linux/hardirq.h> #include <linux/kdebug.h> #include <linux/module.h> #include <linux/ptrace.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> #include <linux/ftrace.h> #include <linux/kexec.h> #include <linux/bug.h> #include <linux/nmi.h> #include <linux/sysfs.h> #include <linux/kasan.h> #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> #include <asm/unwind.h> int panic_on_unrecovered_nmi; int panic_on_io_nmi; static int die_counter; static struct pt_regs exec_summary_regs; bool noinstr in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info) { unsigned long *begin = task_stack_page(task); unsigned long *end = task_stack_page(task) + THREAD_SIZE; if (stack < begin || stack >= end) return false; info->type = STACK_TYPE_TASK; info->begin = begin; info->end = end; info->next_sp = NULL; return true; } /* Called from get_stack_info_noinstr - so must be noinstr too */ bool noinstr in_entry_stack(unsigned long *stack, struct stack_info *info) { struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); void *begin = ss; void *end = ss + 1; if ((void *)stack < begin || (void *)stack >= end) return false; info->type = STACK_TYPE_ENTRY; info->begin = begin; info->end = end; info->next_sp = NULL; return true; } static void printk_stack_address(unsigned long address, int reliable, const char *log_lvl) { touch_nmi_watchdog(); printk("%s %s%pBb\n", log_lvl, reliable ? "" : "? ", (void *)address); } static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, unsigned int nbytes) { if (!user_mode(regs)) return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); /* The user space code from other tasks cannot be accessed. */ if (regs != task_pt_regs(current)) return -EPERM; /* * Even if named copy_from_user_nmi() this can be invoked from * other contexts and will not try to resolve a pagefault, which is * the correct thing to do here as this code can be called from any * context. */ return copy_from_user_nmi(buf, (void __user *)src, nbytes); } /* * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus: * * In case where we don't have the exact kernel image (which, if we did, we can * simply disassemble and navigate to the RIP), the purpose of the bigger * prologue is to have more context and to be able to correlate the code from * the different toolchains better. * * In addition, it helps in recreating the register allocation of the failing * kernel and thus make sense of the register dump. * * What is more, the additional complication of a variable length insn arch like * x86 warrants having longer byte sequence before rIP so that the disassembler * can "sync" up properly and find instruction boundaries when decoding the * opcode bytes. * * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random * guesstimate in attempt to achieve all of the above. */ void show_opcodes(struct pt_regs *regs, const char *loglvl) { #define PROLOGUE_SIZE 42 #define EPILOGUE_SIZE 21 #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) u8 opcodes[OPCODE_BUFSIZE]; unsigned long prologue = regs->ip - PROLOGUE_SIZE; switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { case 0: printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes, opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1); break; case -EPERM: /* No access to the user space stack of other tasks. Ignore. */ break; default: printk("%sCode: Unable to access opcode bytes at 0x%lx.\n", loglvl, prologue); break; } } void show_ip(struct pt_regs *regs, const char *loglvl) { #ifdef CONFIG_X86_32 printk("%sEIP: %pS\n", loglvl, (void *)regs->ip); #else printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip); #endif show_opcodes(regs, loglvl); } void show_iret_regs(struct pt_regs *regs, const char *log_lvl) { show_ip(regs, log_lvl); printk("%sRSP: %04x:%016lx EFLAGS: %08lx", log_lvl, (int)regs->ss, regs->sp, regs->flags); } static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, bool partial, const char *log_lvl) { /* * These on_stack() checks aren't strictly necessary: the unwind code * has already validated the 'regs' pointer. The checks are done for * ordering reasons: if the registers are on the next stack, we don't * want to print them out yet. Otherwise they'll be shown as part of * the wrong stack. Later, when show_trace_log_lvl() switches to the * next stack, this function will be called again with the same regs so * they can be printed in the right context. */ if (!partial && on_stack(info, regs, sizeof(*regs))) { __show_regs(regs, SHOW_REGS_SHORT, log_lvl); } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, IRET_FRAME_SIZE)) { /* * When an interrupt or exception occurs in entry code, the * full pt_regs might not have been saved yet. In that case * just print the iret frame. */ show_iret_regs(regs, log_lvl); } } /* * This function reads pointers from the stack and dereferences them. The * pointers may not have their KMSAN shadow set up properly, which may result * in false positive reports. Disable instrumentation to avoid those. */ __no_kmsan_checks static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, const char *log_lvl) { struct unwind_state state; struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; bool partial = false; printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); regs = unwind_get_entry_regs(&state, &partial); /* * Iterate through the stacks, starting with the current stack pointer. * Each stack has a pointer to the next one. * * x86-64 can have several stacks: * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) * - entry stack * * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack * - entry stack */ for (stack = stack ?: get_stack_pointer(task, regs); stack; stack = stack_info.next_sp) { const char *stack_name; stack = PTR_ALIGN(stack, sizeof(long)); if (get_stack_info(stack, task, &stack_info, &visit_mask)) { /* * We weren't on a valid stack. It's possible that * we overflowed a valid stack into a guard page. * See if the next page up is valid so that we can * generate some kind of backtrace if this happens. */ stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); if (get_stack_info(stack, task, &stack_info, &visit_mask)) break; } stack_name = stack_type_name(stack_info.type); if (stack_name) printk("%s <%s>\n", log_lvl, stack_name); if (regs) show_regs_if_on_stack(&stack_info, regs, partial, log_lvl); /* * Scan the stack, printing any text addresses we find. At the * same time, follow proper stack frames with the unwinder. * * Addresses found during the scan which are not reported by * the unwinder are considered to be additional clues which are * sometimes useful for debugging and are prefixed with '?'. * This also serves as a failsafe option in case the unwinder * goes off in the weeds. */ for (; stack < stack_info.end; stack++) { unsigned long real_addr; int reliable = 0; unsigned long addr = READ_ONCE_NOCHECK(*stack); unsigned long *ret_addr_p = unwind_get_return_address_ptr(&state); if (!__kernel_text_address(addr)) continue; /* * Don't print regs->ip again if it was already printed * by show_regs_if_on_stack(). */ if (regs && stack == ®s->ip) goto next; if (stack == ret_addr_p) reliable = 1; /* * When function graph tracing is enabled for a * function, its return address on the stack is * replaced with the address of an ftrace handler * (return_to_handler). In that case, before printing * the "real" address, we want to print the handler * address as an "unreliable" hint that function graph * tracing was involved. */ real_addr = ftrace_graph_ret_addr(task, &graph_idx, addr, stack); if (real_addr != addr) printk_stack_address(addr, 0, log_lvl); printk_stack_address(real_addr, reliable, log_lvl); if (!reliable) continue; next: /* * Get the next frame from the unwinder. No need to * check for an error: if anything goes wrong, the rest * of the addresses will just be printed as unreliable. */ unwind_next_frame(&state); /* if the frame has entry regs, print them */ regs = unwind_get_entry_regs(&state, &partial); if (regs) show_regs_if_on_stack(&stack_info, regs, partial, log_lvl); } if (stack_name) printk("%s </%s>\n", log_lvl, stack_name); } } void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { task = task ? : current; /* * Stack frames below this one aren't interesting. Don't show them * if we're printing for %current. */ if (!sp && task == current) sp = get_stack_pointer(current, NULL); show_trace_log_lvl(task, NULL, sp, loglvl); } void show_stack_regs(struct pt_regs *regs) { show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); } static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; unsigned long oops_begin(void) { int cpu; unsigned long flags; oops_enter(); /* racy, but better than risking deadlock. */ raw_local_irq_save(flags); cpu = smp_processor_id(); if (!arch_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else arch_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; console_verbose(); bust_spinlocks(1); return flags; } NOKPROBE_SYMBOL(oops_begin); void __noreturn rewind_stack_and_make_dead(int signr); void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); die_owner = -1; add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; if (!die_nest_count) /* Nest count reaches zero, release the lock. */ arch_spin_unlock(&die_lock); raw_local_irq_restore(flags); oops_exit(); /* Executive summary in case the oops scrolled away */ __show_regs(&exec_summary_regs, SHOW_REGS_ALL, KERN_DEFAULT); if (!signr) return; if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); /* * We're not going to return, but we might be on an IST stack or * have very little stack space left. Rewind the stack and kill * the task. * Before we rewind the stack, we have to tell KASAN that we're going to * reuse the task stack and that existing poisons are invalid. */ kasan_unpoison_task_stack(current); rewind_stack_and_make_dead(signr); } NOKPROBE_SYMBOL(oops_end); static void __die_header(const char *str, struct pt_regs *regs, long err) { const char *pr = ""; /* Save the regs of the first oops for the executive summary later. */ if (!die_counter) exec_summary_regs = *regs; if (IS_ENABLED(CONFIG_PREEMPTION)) pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; printk(KERN_DEFAULT "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, pr, IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); } NOKPROBE_SYMBOL(__die_header); static int __die_body(const char *str, struct pt_regs *regs, long err) { show_regs(regs); print_modules(); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; retu |