Total coverage: 307596 (17%)of 1836583
156 153 153 153 153 153 153 153 153 153 153 153 153 153 153 153 153 153 153 27 27 222 222 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_error.h" #include "xfs_health.h" #include "xfs_metadir.h" #include "xfs_metafile.h" int xfs_calc_dquots_per_chunk( unsigned int nbblks) /* basic block units */ { ASSERT(nbblks > 0); return BBTOB(nbblks) / sizeof(struct xfs_dqblk); } /* * Do some primitive error checking on ondisk dquot data structures. * * The xfs_dqblk structure /contains/ the xfs_disk_dquot structure; * we verify them separately because at some points we have only the * smaller xfs_disk_dquot structure available. */ xfs_failaddr_t xfs_dquot_verify( struct xfs_mount *mp, struct xfs_disk_dquot *ddq, xfs_dqid_t id) /* used only during quotacheck */ { __u8 ddq_type; /* * We can encounter an uninitialized dquot buffer for 2 reasons: * 1. If we crash while deleting the quotainode(s), and those blks got * used for user data. This is because we take the path of regular * file deletion; however, the size field of quotainodes is never * updated, so all the tricks that we play in itruncate_finish * don't quite matter. * * 2. We don't play the quota buffers when there's a quotaoff logitem. * But the allocation will be replayed so we'll end up with an * uninitialized quota block. * * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) return __this_address; if (ddq->d_version != XFS_DQUOT_VERSION) return __this_address; if (ddq->d_type & ~XFS_DQTYPE_ANY) return __this_address; ddq_type = ddq->d_type & XFS_DQTYPE_REC_MASK; if (ddq_type != XFS_DQTYPE_USER && ddq_type != XFS_DQTYPE_PROJ && ddq_type != XFS_DQTYPE_GROUP) return __this_address; if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !xfs_has_bigtime(mp)) return __this_address; if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !ddq->d_id) return __this_address; if (id != -1 && id != be32_to_cpu(ddq->d_id)) return __this_address; if (!ddq->d_id) return NULL; if (ddq->d_blk_softlimit && be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) && !ddq->d_btimer) return __this_address; if (ddq->d_ino_softlimit && be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) && !ddq->d_itimer) return __this_address; if (ddq->d_rtb_softlimit && be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) && !ddq->d_rtbtimer) return __this_address; return NULL; } xfs_failaddr_t xfs_dqblk_verify( struct xfs_mount *mp, struct xfs_dqblk *dqb, xfs_dqid_t id) /* used only during quotacheck */ { if (xfs_has_crc(mp) && !uuid_equal(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; return xfs_dquot_verify(mp, &dqb->dd_diskdq, id); } /* * Do some primitive error checking on ondisk dquot data structures. */ void xfs_dqblk_repair( struct xfs_mount *mp, struct xfs_dqblk *dqb, xfs_dqid_t id, xfs_dqtype_t type) { /* * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); memset(dqb, 0, sizeof(struct xfs_dqblk)); dqb->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); dqb->dd_diskdq.d_version = XFS_DQUOT_VERSION; dqb->dd_diskdq.d_type = type; dqb->dd_diskdq.d_id = cpu_to_be32(id); if (xfs_has_crc(mp)) { uuid_copy(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } } STATIC bool xfs_dquot_buf_verify_crc( struct xfs_mount *mp, struct xfs_buf *bp, bool readahead) { struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; int ndquots; int i; if (!xfs_has_crc(mp)) return true; /* * if we are in log recovery, the quota subsystem has not been * initialised so we have no quotainfo structure. In that case, we need * to manually calculate the number of dquots in the buffer. */ if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else ndquots = xfs_calc_dquots_per_chunk(bp->b_length); for (i = 0; i < ndquots; i++, d++) { if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF)) { if (!readahead) xfs_buf_verifier_error(bp, -EFSBADCRC, __func__, d, sizeof(*d), __this_address); return false; } } return true; } STATIC xfs_failaddr_t xfs_dquot_buf_verify( struct xfs_mount *mp, struct xfs_buf *bp, bool readahead) { struct xfs_dqblk *dqb = bp->b_addr; xfs_failaddr_t fa; xfs_dqid_t id = 0; int ndquots; int i; /* * if we are in log recovery, the quota subsystem has not been * initialised so we have no quotainfo structure. In that case, we need * to manually calculate the number of dquots in the buffer. */ if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else ndquots = xfs_calc_dquots_per_chunk(bp->b_length); /* * On the first read of the buffer, verify that each dquot is valid. * We don't know what the id of the dquot is supposed to be, just that * they should be increasing monotonically within the buffer. If the * first id is corrupt, then it will fail on the second dquot in the * buffer so corruptions could point to the wrong dquot in this case. */ for (i = 0; i < ndquots; i++) { struct xfs_disk_dquot *ddq; ddq = &dqb[i].dd_diskdq; if (i == 0) id = be32_to_cpu(ddq->d_id); fa = xfs_dqblk_verify(mp, &dqb[i], id + i); if (fa) { if (!readahead) xfs_buf_verifier_error(bp, -EFSCORRUPTED, __func__, &dqb[i], sizeof(struct xfs_dqblk), fa); return fa; } } return NULL; } static xfs_failaddr_t xfs_dquot_buf_verify_struct( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; return xfs_dquot_buf_verify(mp, bp, false); } static void xfs_dquot_buf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; if (!xfs_dquot_buf_verify_crc(mp, bp, false)) return; xfs_dquot_buf_verify(mp, bp, false); } /* * readahead errors are silent and simply leave the buffer as !done so a real * read will then be run with the xfs_dquot_buf_ops verifier. See * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than * reporting the failure. */ static void xfs_dquot_buf_readahead_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; if (!xfs_dquot_buf_verify_crc(mp, bp, true) || xfs_dquot_buf_verify(mp, bp, true) != NULL) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; } } /* * we don't calculate the CRC here as that is done when the dquot is flushed to * the buffer after the update is done. This ensures that the dquot in the * buffer always has an up-to-date CRC value. */ static void xfs_dquot_buf_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; xfs_dquot_buf_verify(mp, bp, false); } const struct xfs_buf_ops xfs_dquot_buf_ops = { .name = "xfs_dquot", .magic16 = { cpu_to_be16(XFS_DQUOT_MAGIC), cpu_to_be16(XFS_DQUOT_MAGIC) }, .verify_read = xfs_dquot_buf_read_verify, .verify_write = xfs_dquot_buf_write_verify, .verify_struct = xfs_dquot_buf_verify_struct, }; const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { .name = "xfs_dquot_ra", .magic16 = { cpu_to_be16(XFS_DQUOT_MAGIC), cpu_to_be16(XFS_DQUOT_MAGIC) }, .verify_read = xfs_dquot_buf_readahead_verify, .verify_write = xfs_dquot_buf_write_verify, }; /* Convert an on-disk timer value into an incore timer value. */ time64_t xfs_dquot_from_disk_ts( struct xfs_disk_dquot *ddq, __be32 dtimer) { uint32_t t = be32_to_cpu(dtimer); if (t != 0 && (ddq->d_type & XFS_DQTYPE_BIGTIME)) return xfs_dq_bigtime_to_unix(t); return t; } /* Convert an incore timer value into an on-disk timer value. */ __be32 xfs_dquot_to_disk_ts( struct xfs_dquot *dqp, time64_t timer) { uint32_t t = timer; if (timer != 0 && (dqp->q_type & XFS_DQTYPE_BIGTIME)) t = xfs_dq_unix_to_bigtime(timer); return cpu_to_be32(t); } inline unsigned int xfs_dqinode_sick_mask(xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return XFS_SICK_FS_UQUOTA; case XFS_DQTYPE_GROUP: return XFS_SICK_FS_GQUOTA; case XFS_DQTYPE_PROJ: return XFS_SICK_FS_PQUOTA; } ASSERT(0); return 0; } /* * Load the inode for a given type of quota, assuming that the sb fields have * been sorted out. This is not true when switching quota types on a V4 * filesystem, so do not use this function for that. If metadir is enabled, * @dp must be the /quota metadir. * * Returns -ENOENT if the quota inode field is NULLFSINO; 0 and an inode on * success; or a negative errno. */ int xfs_dqinode_load( struct xfs_trans *tp, struct xfs_inode *dp, xfs_dqtype_t type, struct xfs_inode **ipp) { struct xfs_mount *mp = tp->t_mountp; struct xfs_inode *ip; enum xfs_metafile_type metafile_type = xfs_dqinode_metafile_type(type); int error; if (!xfs_has_metadir(mp)) { xfs_ino_t ino; switch (type) { case XFS_DQTYPE_USER: ino = mp->m_sb.sb_uquotino; break; case XFS_DQTYPE_GROUP: ino = mp->m_sb.sb_gquotino; break; case XFS_DQTYPE_PROJ: ino = mp->m_sb.sb_pquotino; break; default: ASSERT(0); return -EFSCORRUPTED; } /* Should have set 0 to NULLFSINO when loading superblock */ if (ino == NULLFSINO) return -ENOENT; error = xfs_trans_metafile_iget(tp, ino, metafile_type, &ip); } else { error = xfs_metadir_load(tp, dp, xfs_dqinode_path(type), metafile_type, &ip); if (error == -ENOENT) return error; } if (error) { if (xfs_metadata_is_sick(error)) xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type)); return error; } if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) { xfs_irele(ip); xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type)); return -EFSCORRUPTED; } if (XFS_IS_CORRUPT(mp, ip->i_projid != 0)) { xfs_irele(ip); xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type)); return -EFSCORRUPTED; } *ipp = ip; return 0; } /* Create a metadata directory quota inode. */ int xfs_dqinode_metadir_create( struct xfs_inode *dp, xfs_dqtype_t type, struct xfs_inode **ipp) { struct xfs_metadir_update upd = { .dp = dp, .metafile_type = xfs_dqinode_metafile_type(type), .path = xfs_dqinode_path(type), }; int error; error = xfs_metadir_start_create(&upd); if (error) return error; error = xfs_metadir_create(&upd, S_IFREG); if (error) return error; xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE); error = xfs_metadir_commit(&upd); if (error) return error; xfs_finish_inode_setup(upd.ip); *ipp = upd.ip; return 0; } #ifndef __KERNEL__ /* Link a metadata directory quota inode. */ int xfs_dqinode_metadir_link( struct xfs_inode *dp, xfs_dqtype_t type, struct xfs_inode *ip) { struct xfs_metadir_update upd = { .dp = dp, .metafile_type = xfs_dqinode_metafile_type(type), .path = xfs_dqinode_path(type), .ip = ip, }; int error; error = xfs_metadir_start_link(&upd); if (error) return error; error = xfs_metadir_link(&upd); if (error) return error; xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE); return xfs_metadir_commit(&upd); } #endif /* __KERNEL__ */ /* Create the parent directory for all quota inodes and load it. */ int xfs_dqinode_mkdir_parent( struct xfs_mount *mp, struct xfs_inode **dpp) { if (!mp->m_metadirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } return xfs_metadir_mkdir(mp->m_metadirip, "quota", dpp); } /* * Load the parent directory of all quota inodes. Pass the inode to the caller * because quota functions (e.g. QUOTARM) can be called on the quota files even * if quotas are not enabled. */ int xfs_dqinode_load_parent( struct xfs_trans *tp, struct xfs_inode **dpp) { struct xfs_mount *mp = tp->t_mountp; if (!mp->m_metadirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } return xfs_metadir_load(tp, mp->m_metadirip, "quota", XFS_METAFILE_DIR, dpp); }
1 1 43 2 48 28 1 17 80 80 80 23 9 14 45 31 1 1 59 1 42 1 104 79 78 1 78 7 32 4 65 95 1 29 93 93 6 12 93 84 83 85 85 82 3 33 33 29 29 11 19 5 26 80 5 75 75 75 75 22 63 14 28 47 7 68 95 95 65 22 42 34 3 1 9 7 1 1 61 61 61 7 16 37 37 45 46 1 38 10 10 13 1 5 5 6 3 15 9 3 8 3 4 51 51 33 49 11 37 2 46 3 11 11 84 85 84 38 46 85 83 85 84 85 32 27 8 4 8 8 1 107 7 5 1 6 34 1 2 32 30 23 118 201 202 2 70 65 85 40 17 2 24 29 56 1 26 28 46 39 29 57 30 29 1 56 56 55 27 27 22 5 45 125 127 60 62 9 32 3 3 7 4 54 55 55 55 78 1 74 42 42 42 42 42 13 62 18 93 57 13 23 9 14 2 18 26 19 19 105 104 2 50 2 65 78 3 5 8 8 69 3 68 67 17 59 4 3 13 8 19 14 46 6 2 10 31 12 5 24 13 1 40 2 1 37 38 6 6 22 34 34 4 1 25 1 10 10 2 29 18 29 25 28 100 100 100 70 44 100 148 148 148 146 98 67 148 150 150 34 52 100 76 40 25 14 14 1 9 20 76 2 47 10 30 9 63 179 43 78 283 283 220 150 116 144 55 282 220 219 68 43 1 81 23 160 160 172 269 35 271 235 95 140 147 148 145 2 2 277 277 276 17 271 267 13 15 15 15 1 1 14 13 23 33 34 2 2 2 26 5 1 3 10 3 13 2 10 44 44 4 7 8 1 2 2 13 1 12 12 7 7 1 6 6 1 1 11 1 10 4 6 132 5 127 8 101 2 1 3 2 42 25 97 9 9 9 9 283 282 283 275 280 93 93 75 75 75 40 76 1 1 16 60 2 5 68 85 45 42 112 2 20 6 14 1 78 15 1 3 2 3 1 1 1 2 10 3 17 2 1 1 1 1 1 2 2 1 1 1 2 9 5 3 1 1 1 24 2 2 15 4 2 15 8 8 16 17 17 16 11 11 27 38 31 9 9 7 7 5 2 8 6 10 6 1 1 2 6 6 6 6 439 15 438 145 144 160 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 /* * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, 2012-2019, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <linux/rhashtable.h> #include <linux/sched/signal.h> #include <trace/events/sock.h> #include "core.h" #include "name_table.h" #include "node.h" #include "link.h" #include "name_distr.h" #include "socket.h" #include "bcast.h" #include "netlink.h" #include "group.h" #include "trace.h" #define NAGLE_START_INIT 4 #define NAGLE_START_MAX 1024 #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 #define TIPC_ACK_RATE 4 /* ACK at 1/4 of rcv window size */ enum { TIPC_LISTEN = TCP_LISTEN, TIPC_ESTABLISHED = TCP_ESTABLISHED, TIPC_OPEN = TCP_CLOSE, TIPC_DISCONNECTING = TCP_CLOSE_WAIT, TIPC_CONNECTING = TCP_SYN_SENT, }; struct sockaddr_pair { struct sockaddr_tipc sock; struct sockaddr_tipc member; }; /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API * @max_pkt: maximum packet size "hint" used when building messages sent by port * @maxnagle: maximum size of msg which can be subject to nagle * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages * @cong_links: list of congested links * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @conn_timeout: the time we can wait for an unresponded setup request * @probe_unacked: probe has not received ack yet * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links * @snt_unacked: # messages sent by socket, and not yet acked by peer * @snd_win: send window size * @peer_caps: peer capabilities mask * @rcv_unacked: # messages read by user, but not yet acked back to peer * @rcv_win: receive window size * @peer: 'connected' peer for dgram/rdm * @node: hash table node * @mc_method: cookie for use between socket and broadcast layer * @rcu: rcu struct for tipc_sock * @group: TIPC communications group * @oneway: message count in one direction (FIXME) * @nagle_start: current nagle value * @snd_backlog: send backlog count * @msg_acc: messages accepted; used in managing backlog and nagle * @pkt_cnt: TIPC socket packet count * @expect_ack: whether this TIPC socket is expecting an ack * @nodelay: setsockopt() TIPC_NODELAY setting * @group_is_open: TIPC socket group is fully open (FIXME) * @published: true if port has one or more associated names * @conn_addrtype: address type used when establishing connection */ struct tipc_sock { struct sock sk; u32 max_pkt; u32 maxnagle; u32 portid; struct tipc_msg phdr; struct list_head cong_links; struct list_head publications; u32 pub_count; atomic_t dupl_rcvcnt; u16 conn_timeout; bool probe_unacked; u16 cong_link_cnt; u16 snt_unacked; u16 snd_win; u16 peer_caps; u16 rcv_unacked; u16 rcv_win; struct sockaddr_tipc peer; struct rhash_head node; struct tipc_mc_method mc_method; struct rcu_head rcu; struct tipc_group *group; u32 oneway; u32 nagle_start; u16 snd_backlog; u16 msg_acc; u16 pkt_cnt; bool expect_ack; bool nodelay; bool group_is_open; bool published; u8 conn_addrtype; }; static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); static void tipc_sk_timeout(struct timer_list *t); static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); static int tipc_wait_for_connect(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; static const struct rhashtable_params tsk_rht_params; static u32 tsk_own_node(struct tipc_sock *tsk) { return msg_prevnode(&tsk->phdr); } static u32 tsk_peer_node(struct tipc_sock *tsk) { return msg_destnode(&tsk->phdr); } static u32 tsk_peer_port(struct tipc_sock *tsk) { return msg_destport(&tsk->phdr); } static bool tsk_unreliable(struct tipc_sock *tsk) { return msg_src_droppable(&tsk->phdr) != 0; } static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) { msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); } static bool tsk_unreturnable(struct tipc_sock *tsk) { return msg_dest_droppable(&tsk->phdr) != 0; } static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) { msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); } static int tsk_importance(struct tipc_sock *tsk) { return msg_importance(&tsk->phdr); } static struct tipc_sock *tipc_sk(const struct sock *sk) { return container_of(sk, struct tipc_sock, sk); } int tsk_set_importance(struct sock *sk, int imp) { if (imp > TIPC_CRITICAL_IMPORTANCE) return -EINVAL; msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp); return 0; } static bool tsk_conn_cong(struct tipc_sock *tsk) { return tsk->snt_unacked > tsk->snd_win; } static u16 tsk_blocks(int len) { return ((len / FLOWCTL_BLK_SZ) + 1); } /* tsk_blocks(): translate a buffer size in bytes to number of * advertisable blocks, taking into account the ratio truesize(len)/len * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ */ static u16 tsk_adv_blocks(int len) { return len / FLOWCTL_BLK_SZ / 4; } /* tsk_inc(): increment counter for sent or received data * - If block based flow control is not supported by peer we * fall back to message based ditto, incrementing the counter */ static u16 tsk_inc(struct tipc_sock *tsk, int msglen) { if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) return ((msglen / FLOWCTL_BLK_SZ) + 1); return 1; } /* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle */ static void tsk_set_nagle(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; tsk->maxnagle = 0; if (sk->sk_type != SOCK_STREAM) return; if (tsk->nodelay) return; if (!(tsk->peer_caps & TIPC_NAGLE)) return; /* Limit node local buffer size to avoid receive queue overflow */ if (tsk->max_pkt == MAX_MSG_SIZE) tsk->maxnagle = 1500; else tsk->maxnagle = tsk->max_pkt; } /** * tsk_advance_rx_queue - discard first buffer in socket receive queue * @sk: network socket * * Caller must hold socket lock */ static void tsk_advance_rx_queue(struct sock *sk) { trace_tipc_sk_advance_rx(sk, NULL, TIPC_DUMP_SK_RCVQ, " "); kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); } /* tipc_sk_respond() : send response message back to sender */ static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) { u32 selector; u32 dnode; u32 onode = tipc_own_addr(sock_net(sk)); if (!tipc_msg_reverse(onode, &skb, err)) return; trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, "@sk_respond!"); dnode = msg_destnode(buf_msg(skb)); selector = msg_origport(buf_msg(skb)); tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); } /** * tsk_rej_rx_queue - reject all buffers in socket receive queue * @sk: network socket * @error: response error code * * Caller must hold socket lock */ static void tsk_rej_rx_queue(struct sock *sk, int error) { struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_receive_queue))) tipc_sk_respond(sk, skb, error); } static bool tipc_sk_connected(const struct sock *sk) { return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED; } /* tipc_sk_type_connectionless - check if the socket is datagram socket * @sk: socket * * Returns true if connection less, false otherwise */ static bool tipc_sk_type_connectionless(struct sock *sk) { return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; } /* tsk_peer_msg - verify if message was sent by connected port's peer * * Handles cases where the node's network address has changed from * the default of <0.0.0> to its configured setting. */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { struct sock *sk = &tsk->sk; u32 self = tipc_own_addr(sock_net(sk)); u32 peer_port = tsk_peer_port(tsk); u32 orig_node, peer_node; if (unlikely(!tipc_sk_connected(sk))) return false; if (unlikely(msg_origport(msg) != peer_port)) return false; orig_node = msg_orignode(msg); peer_node = tsk_peer_node(tsk); if (likely(orig_node == peer_node)) return true; if (!orig_node && peer_node == self) return true; if (!peer_node && orig_node == self) return true; return false; } /* tipc_set_sk_state - set the sk_state of the socket * @sk: socket * * Caller must hold socket lock * * Returns 0 on success, errno otherwise */ static int tipc_set_sk_state(struct sock *sk, int state) { int oldsk_state = sk->sk_state; int res = -EINVAL; switch (state) { case TIPC_OPEN: res = 0; break; case TIPC_LISTEN: case TIPC_CONNECTING: if (oldsk_state == TIPC_OPEN) res = 0; break; case TIPC_ESTABLISHED: if (oldsk_state == TIPC_CONNECTING || oldsk_state == TIPC_OPEN) res = 0; break; case TIPC_DISCONNECTING: if (oldsk_state == TIPC_CONNECTING || oldsk_state == TIPC_ESTABLISHED) res = 0; break; } if (!res) sk->sk_state = state; return res; } static int tipc_sk_sock_err(struct socket *sock, long *timeout) { struct sock *sk = sock->sk; int err = sock_error(sk); int typ = sock->type; if (err) return err; if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) { if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; else if (!tipc_sk_connected(sk)) return -ENOTCONN; } if (!*timeout) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(*timeout); return 0; } #define tipc_wait_for_cond(sock_, timeo_, condition_) \ ({ \ DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ struct sock *sk_; \ int rc_; \ \ while ((rc_ = !(condition_))) { \ /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \ smp_rmb(); \ sk_ = (sock_)->sk; \ rc_ = tipc_sk_sock_err((sock_), timeo_); \ if (rc_) \ break; \ add_wait_queue(sk_sleep(sk_), &wait_); \ release_sock(sk_); \ *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ sched_annotate_sleep(); \ lock_sock(sk_); \ remove_wait_queue(sk_sleep(sk_), &wait_); \ } \ rc_; \ }) /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) * @sock: pre-allocated socket structure * @protocol: protocol indicator (must be 0) * @kern: caused by kernel or by userspace? * * This routine creates additional data structures used by the TIPC socket, * initializes them, and links them together. * * Return: 0 on success, errno otherwise */ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int kern) { const struct proto_ops *ops; struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; /* Validate arguments */ if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; switch (sock->type) { case SOCK_STREAM: ops = &stream_ops; break; case SOCK_SEQPACKET: ops = &packet_ops; break; case SOCK_DGRAM: case SOCK_RDM: ops = &msg_ops; break; default: return -EPROTOTYPE; } /* Allocate socket's protocol area */ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); if (sk == NULL) return -ENOMEM; tsk = tipc_sk(sk); tsk->max_pkt = MAX_PKT_DEFAULT; tsk->maxnagle = 0; tsk->nagle_start = NAGLE_START_INIT; INIT_LIST_HEAD(&tsk->publications); INIT_LIST_HEAD(&tsk->cong_links); msg = &tsk->phdr; /* Finish initializing socket data structures */ sock->ops = ops; sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } /* Ensure tsk is visible before we read own_addr. */ smp_mb(); tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); msg_set_origport(msg, tsk->portid); timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_sk_backlog_rcv; sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]); sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; tsk->group_is_open = true; atomic_set(&tsk->dupl_rcvcnt, 0); /* Start out with safe limits until we receive an advertised window */ tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); tsk->rcv_win = tsk->snd_win; if (tipc_sk_type_connectionless(sk)) { tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); } __skb_queue_head_init(&tsk->mc_method.deferredq); trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); return 0; } static void tipc_sk_callback(struct rcu_head *head) { struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); sock_put(&tsk->sk); } /* Caller should hold socket lock for the socket. */ static void __tipc_shutdown(struct socket *sock, int error) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); u32 dnode = tsk_peer_node(tsk); struct sk_buff *skb; /* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */ tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))); /* Push out delayed messages if in Nagle mode */ tipc_sk_push_backlog(tsk, false); /* Remove pending SYN */ __skb_queue_purge(&sk->sk_write_queue); /* Remove partially received buffer if any */ skb = skb_peek(&sk->sk_receive_queue); if (skb && TIPC_SKB_CB(skb)->bytes_read) { __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); } /* Reject all unreceived messages if connectionless */ if (tipc_sk_type_connectionless(sk)) { tsk_rej_rx_queue(sk, error); return; } switch (sk->sk_state) { case TIPC_CONNECTING: case TIPC_ESTABLISHED: tipc_set_sk_state(sk, TIPC_DISCONNECTING); tipc_node_remove_conn(net, dnode, tsk->portid); /* Send a FIN+/- to its peer */ skb = __skb_dequeue(&sk->sk_receive_queue); if (skb) { __skb_queue_purge(&sk->sk_receive_queue); tipc_sk_respond(sk, skb, error); break; } skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, error); if (skb) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); break; case TIPC_LISTEN: /* Reject all SYN messages */ tsk_rej_rx_queue(sk, error); break; default: __skb_queue_purge(&sk->sk_receive_queue); break; } } /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy * * This routine cleans up any messages that are still queued on the socket. * For DGRAM and RDM socket types, all queued messages are rejected. * For SEQPACKET and STREAM socket types, the first message is rejected * and any others are discarded. (If the first message on a STREAM socket * is partially-read, it is discarded and the next one is rejected instead.) * * NOTE: Rejected messages are not necessarily returned to the sender! They * are returned or discarded according to the "destination droppable" setting * specified for the message by the sender. * * Return: 0 on success, errno otherwise */ static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; struct tipc_sock *tsk; /* * Exit if socket isn't fully initialized (occurs when a failed accept() * releases a pre-allocated child socket that was never used) */ if (sk == NULL) return 0; tsk = tipc_sk(sk); lock_sock(sk); trace_tipc_sk_release(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); tipc_sk_withdraw(tsk, NULL); __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); sock_orphan(sk); /* Reject any messages that accumulated in backlog queue */ release_sock(sk); tipc_dest_list_purge(&tsk->cong_links); tsk->cong_link_cnt = 0; call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; return 0; } /** * __tipc_bind - associate or disassociate TIPC name(s) with a socket * @sock: socket structure * @skaddr: socket address describing name(s) and desired operation * @alen: size of socket address data structure * * Name and name sequence binding are indicated using a positive scope value; * a negative scope value unbinds the specified name. Specifying no name * (i.e. a socket address length of 0) unbinds all names from the socket. * * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it doesn't * access any non-constant socket information. */ static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); bool unbind = false; if (unlikely(!alen)) return tipc_sk_withdraw(tsk, NULL); if (ua->addrtype == TIPC_SERVICE_ADDR) { ua->addrtype = TIPC_SERVICE_RANGE; ua->sr.upper = ua->sr.lower; } if (ua->scope < 0) { unbind = true; ua->scope = -ua->scope; } /* Users may still use deprecated TIPC_ZONE_SCOPE */ if (ua->scope != TIPC_NODE_SCOPE) ua->scope = TIPC_CLUSTER_SCOPE; if (tsk->group) return -EACCES; if (unbind) return tipc_sk_withdraw(tsk, ua); return tipc_sk_publish(tsk, ua); } int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { int res; lock_sock(sock->sk); res = __tipc_bind(sock, skaddr, alen); release_sock(sock->sk); return res; } static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; u32 atype = ua->addrtype; if (alen) { if (!tipc_uaddr_valid(ua, alen)) return -EINVAL; if (atype == TIPC_SOCKET_ADDR) return -EAFNOSUPPORT; if (ua->sr.type < TIPC_RESERVED_TYPES) { pr_warn_once("Can't bind to reserved service type %u\n", ua->sr.type); return -EACCES; } } return tipc_sk_bind(sock, skaddr, alen); } /** * tipc_getname - get port ID of socket or peer socket * @sock: socket structure * @uaddr: area for returned socket address * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it only * accesses socket information that is unchanging (or which changes in * a completely predictable manner). */ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); memset(addr, 0, sizeof(*addr)); if (peer) { if ((!tipc_sk_connected(sk)) && ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); } else { addr->addr.id.ref = tsk->portid; addr->addr.id.node = tipc_own_addr(sock_net(sk)); } addr->addrtype = TIPC_SOCKET_ADDR; addr->family = AF_TIPC; addr->scope = 0; addr->addr.name.domain = 0; return sizeof(*addr); } /** * tipc_poll - read and possibly block on pollmask * @file: file structure associated with the socket * @sock: socket for which to calculate the poll bits * @wait: ??? * * Return: pollmask value * * COMMENTARY: * It appears that the usual socket locking mechanisms are not useful here * since the pollmask info is potentially out-of-date the moment this routine * exits. TCP and other protocols seem to rely on higher level poll routines * to handle any preventable race conditions, so TIPC will do the same ... * * IMPORTANT: The fact that a read or write operation is indicated does NOT * imply that the operation will succeed, merely that it should be performed * and will not block. */ static __poll_t tipc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); __poll_t revents = 0; sock_poll_wait(file, sock, wait); trace_tipc_sk_poll(sk, NULL, TIPC_DUMP_ALL, " "); if (sk->sk_shutdown & RCV_SHUTDOWN) revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) revents |= EPOLLHUP; switch (sk->sk_state) { case TIPC_ESTABLISHED: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= EPOLLOUT; fallthrough; case TIPC_LISTEN: case TIPC_CONNECTING: if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_OPEN: if (tsk->group_is_open && !tsk->cong_link_cnt) revents |= EPOLLOUT; if (!tipc_sk_type_connectionless(sk)) break; if (skb_queue_empty_lockless(&sk->sk_receive_queue)) break; revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_DISCONNECTING: revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP; break; } return revents; } /** * tipc_sendmcast - send multicast message * @sock: socket structure * @ua: destination address struct * @msg: message to send * @dlen: length of data to send * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua, struct msghdr *msg, size_t dlen, long timeout) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; if (tsk->group) return -EACCES; /* Block or return if any destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); if (unlikely(rc)) return rc; /* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH; /* Build message header */ msg_set_type(hdr, TIPC_MCAST_MSG); msg_set_hdr_sz(hdr, MCAST_H_SIZE); msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); msg_set_nametype(hdr, ua->sr.type); msg_set_namelower(hdr, ua->sr.lower); msg_set_nameupper(hdr, ua->sr.upper); /* Build message as chain of buffers */ __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); /* Send message if build was successful */ if (unlikely(rc == dlen)) { trace_tipc_sk_sendmcast(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts, &tsk->cong_link_cnt); } tipc_nlist_purge(&dsts); return rc ? rc : dlen; } /** * tipc_send_group_msg - send a message to a member in the group * @net: network namespace * @tsk: tipc socket * @m: message to send * @mb: group member * @dnode: destination node * @dport: destination port * @dlen: total length of message data */ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, struct msghdr *m, struct tipc_member *mb, u32 dnode, u32 dport, int dlen) { u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); struct tipc_mc_method *method = &tsk->mc_method; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_msg *hdr = &tsk->phdr; struct sk_buff_head pkts; int mtu, rc; /* Complete message header */ msg_set_type(hdr, TIPC_GRP_UCAST_MSG); msg_set_hdr_sz(hdr, GROUP_H_SIZE); msg_set_destport(hdr, dport); msg_set_destnode(hdr, dnode); msg_set_grp_bc_seqno(hdr, bc_snd_nxt); /* Build message as chain of buffers */ __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; /* Send message */ rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); if (unlikely(rc == -ELINKCONG)) { tipc_dest_push(&tsk->cong_links, dnode, 0); tsk->cong_link_cnt++; } /* Update send window */ tipc_group_update_member(mb, blks); /* A broadcast sent within next EXPIRE period must follow same path */ method->rcast = true; method->mandatory = true; return dlen; } /** * tipc_send_group_unicast - send message to a member in the group * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { struct sock *sk = sock->sk; struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); struct tipc_member *mb = NULL; u32 node, port; int rc; node = ua->sk.node; port = ua->sk.ref; if (!port && !node) return -EHOSTUNREACH; /* Block or return if destination link or member is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tipc_dest_find(&tsk->cong_links, node, 0) && tsk->group && !tipc_group_cong(tsk->group, node, port, blks, &mb)); if (unlikely(rc)) return rc; if (unlikely(!mb)) return -EHOSTUNREACH; rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen); return rc ? rc : dlen; } /** * tipc_send_group_anycast - send message to any member with given identity * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct list_head *cong_links = &tsk->cong_links; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_msg *hdr = &tsk->phdr; struct tipc_member *first = NULL; struct tipc_member *mbr = NULL; struct net *net = sock_net(sk); u32 node, port, exclude; LIST_HEAD(dsts); int lookups = 0; int dstcnt, rc; bool cong; ua->sa.type = msg_nametype(hdr); ua->scope = msg_lookup_scope(hdr); while (++lookups < 4) { exclude = tipc_group_exclude(tsk->group); first = NULL; /* Look for a non-congested destination member, if any */ while (1) { if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); cong = tipc_group_cong(tsk->group, node, port, blks, &mbr); if (!cong) break; if (mbr == first) break; if (!first) first = mbr; } /* Start over if destination was not in member list */ if (unlikely(!mbr)) continue; if (likely(!cong && !tipc_dest_find(cong_links, node, 0))) break; /* Block or return if destination link or member is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tipc_dest_find(cong_links, node, 0) && tsk->group && !tipc_group_cong(tsk->group, node, port, blks, &mbr)); if (unlikely(rc)) return rc; /* Send, unless destination disappeared while waiting */ if (likely(mbr)) break; } if (unlikely(lookups >= 4)) return -EHOSTUNREACH; rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen); return rc ? rc : dlen; } /** * tipc_send_group_bcast - send message to all members in communication group * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_nlist *dsts; struct tipc_mc_method *method = &tsk->mc_method; bool ack = method->mandatory && method->rcast; int blks = tsk_blocks(MCAST_H_SIZE + dlen); struct tipc_msg *hdr = &tsk->phdr; int mtu = tipc_bcast_get_mtu(net); struct sk_buff_head pkts; int rc = -EHOSTUNREACH; /* Block or return if any destination link or member is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt && tsk->group && !tipc_group_bc_cong(tsk->group, blks)); if (unlikely(rc)) return rc; dsts = tipc_group_dests(tsk->group); if (!dsts->local && !dsts->remote) return -EHOSTUNREACH; /* Complete message header */ if (ua) { msg_set_type(hdr, TIPC_GRP_MCAST_MSG); msg_set_nameinst(hdr, ua->sa.instance); } else { msg_set_type(hdr, TIPC_GRP_BCAST_MSG); msg_set_nameinst(hdr, 0); } msg_set_hdr_sz(hdr, GROUP_H_SIZE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group)); /* Avoid getting stuck with repeated forced replicasts */ msg_set_grp_bc_ack_req(hdr, ack); /* Build message as chain of buffers */ __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; /* Send message */ rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt); if (unlikely(rc)) return rc; /* Update broadcast sequence number and send windows */ tipc_group_update_bc_members(tsk->group, blks, ack); /* Broadcast link is now free to choose method for next broadcast */ method->mandatory = false; method->expires = jiffies; return dlen; } /** * tipc_send_group_mcast - send message to all members with given identity * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); u32 dstcnt, exclude; LIST_HEAD(dsts); ua->sa.type = msg_nametype(hdr); ua->scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true)) return -EHOSTUNREACH; if (dstcnt == 1) { tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref); return tipc_send_group_unicast(sock, m, dlen, timeout); } tipc_dest_list_purge(&dsts); return tipc_send_group_bcast(sock, m, dlen, timeout); } /** * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets * @net: the associated network namespace * @arrvq: queue with arriving messages, to be cloned after destination lookup * @inputq: queue with cloned messages, delivered to socket after dest lookup * * Multi-threaded: parallel calls with reference to same queues may occur */ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { u32 self = tipc_own_addr(net); struct sk_buff *skb, *_skb; u32 portid, onode; struct sk_buff_head tmpq; struct list_head dports; struct tipc_msg *hdr; struct tipc_uaddr ua; int user, mtyp, hlen; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); ua.addrtype = TIPC_SERVICE_RANGE; /* tipc_skb_peek() increments the head skb's reference counter */ skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { hdr = buf_msg(skb); user = msg_user(hdr); mtyp = msg_type(hdr); hlen = skb_headroom(skb) + msg_hdr_sz(hdr); onode = msg_orignode(hdr); ua.sr.type = msg_nametype(hdr); ua.sr.lower = msg_namelower(hdr); ua.sr.upper = msg_nameupper(hdr); if (onode == self) ua.scope = TIPC_ANY_SCOPE; else ua.scope = TIPC_CLUSTER_SCOPE; if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { __skb_dequeue(arrvq); __skb_queue_tail(inputq, skb); } kfree_skb(skb); spin_unlock_bh(&inputq->lock); continue; } /* Group messages require exact scope match */ if (msg_in_group(hdr)) { ua.sr.lower = 0; ua.sr.upper = ~0; ua.scope = msg_lookup_scope(hdr); } /* Create destination port list: */ tipc_nametbl_lookup_mcast_sockets(net, &ua, &dports); /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); __skb_queue_tail(&tmpq, _skb); continue; } pr_warn("Failed to clone mcast rcv buffer\n"); } /* Append clones to inputq only if skb is still head of arrvq */ spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(&tmpq, inputq); /* Decrement the skb's refcnt */ kfree_skb(__skb_dequeue(arrvq)); } spin_unlock_bh(&inputq->lock); __skb_queue_purge(&tmpq); kfree_skb(skb); } tipc_sk_rcv(net, inputq); } /* tipc_sk_push_backlog(): send accumulated buffers in socket write queue * when socket is in Nagle mode */ static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack) { struct sk_buff_head *txq = &tsk->sk.sk_write_queue; struct sk_buff *skb = skb_peek_tail(txq); struct net *net = sock_net(&tsk->sk); u32 dnode = tsk_peer_node(tsk); int rc; if (nagle_ack) { tsk->pkt_cnt += skb_queue_len(txq); if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) { tsk->oneway = 0; if (tsk->nagle_start < NAGLE_START_MAX) tsk->nagle_start *= 2; tsk->expect_ack = false; pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n", tsk->portid, tsk->msg_acc, tsk->pkt_cnt, tsk->nagle_start); } else { tsk->nagle_start = NAGLE_START_INIT; if (skb) { msg_set_ack_required(buf_msg(skb)); tsk->expect_ack = true; } else { tsk->expect_ack = false; } } tsk->msg_acc = 0; tsk->pkt_cnt = 0; } if (!skb || tsk->cong_link_cnt) return; /* Do not send SYN again after congestion */ if (msg_is_syn(buf_msg(skb))) return; if (tsk->msg_acc) tsk->pkt_cnt += skb_queue_len(txq); tsk->snt_unacked += tsk->snd_backlog; tsk->snd_backlog = 0; rc = tipc_node_xmit(net, txq, dnode, tsk->portid); if (rc == -ELINKCONG) tsk->cong_link_cnt = 1; } /** * tipc_sk_conn_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket * @skb: pointer to message buffer. * @inputq: buffer list containing the buffers * @xmitq: output message area */ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); u32 onode = tsk_own_node(tsk); struct sock *sk = &tsk->sk; int mtyp = msg_type(hdr); bool was_cong; /* Ignore if connection cannot be validated: */ if (!tsk_peer_msg(tsk, hdr)) { trace_tipc_sk_drop_msg(sk, skb, TIPC_DUMP_NONE, "@proto_rcv!"); goto exit; } if (unlikely(msg_errcode(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), tsk_peer_port(tsk)); sk->sk_state_change(sk); /* State change is ignored if socket already awake, * - convert msg to abort msg and add to inqueue */ msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); msg_set_type(hdr, TIPC_CONN_MSG); msg_set_size(hdr, BASIC_H_SIZE); msg_set_hdr_sz(hdr, BASIC_H_SIZE); __skb_queue_tail(inputq, skb); return; } tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); if (tipc_msg_reverse(onode, &skb, TIPC_OK)) __skb_queue_tail(xmitq, skb); return; } else if (mtyp == CONN_ACK) { was_cong = tsk_conn_cong(tsk); tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr)); tsk->snt_unacked -= msg_conn_ack(hdr); if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) tsk->snd_win = msg_adv_win(hdr); if (was_cong && !tsk_conn_cong(tsk)) sk->sk_write_space(sk); } else if (mtyp != CONN_PROBE_REPLY) { pr_warn("Received unknown CONN_PROTO msg\n"); } exit: kfree_skb(skb); } /** * tipc_sendmsg - send message in connectionless manner * @sock: socket structure * @m: message to send * @dsz: amount of user data to be sent * * Message must have an destination specified explicitly. * Used for SOCK_RDM and SOCK_DGRAM messages, * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. * (Note: 'SYN+' is prohibited on SOCK_STREAM.) * * Return: the number of bytes sent on success, or errno otherwise */ static int tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; int ret; lock_sock(sk); ret = __tipc_sendmsg(sock, m, dsz); release_sock(sk); return ret; } static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_socket_addr skaddr; struct sk_buff_head pkts; int atype, mtu, rc; if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; if (ua) { if (!tipc_uaddr_valid(ua, m->msg_namelen)) return -EINVAL; atype = ua->addrtype; } /* If socket belongs to a communication group follow other paths */ if (grp) { if (!ua) return tipc_send_group_bcast(sock, m, dlen, timeout); if (atype == TIPC_SERVICE_ADDR) return tipc_send_group_anycast(sock, m, dlen, timeout); if (atype == TIPC_SOCKET_ADDR) return tipc_send_group_unicast(sock, m, dlen, timeout); if (atype == TIPC_SERVICE_RANGE) return tipc_send_group_mcast(sock, m, dlen, timeout); return -EINVAL; } if (!ua) { ua = (struct tipc_uaddr *)&tsk->peer; if (!syn && ua->family != AF_TIPC) return -EDESTADDRREQ; atype = ua->addrtype; } if (unlikely(syn)) { if (sk->sk_state == TIPC_LISTEN) return -EPIPE; if (sk->sk_state != TIPC_OPEN) return -EISCONN; if (tsk->published) return -EOPNOTSUPP; if (atype == TIPC_SERVICE_ADDR) tsk->conn_addrtype = atype; msg_set_syn(hdr, 1); } memset(&skaddr, 0, sizeof(skaddr)); /* Determine destination */ if (atype == TIPC_SERVICE_RANGE) { return tipc_sendmcast(sock, ua, m, dlen, timeout); } else if (atype == TIPC_SERVICE_ADDR) { skaddr.node = ua->lookup_node; ua->scope = tipc_node2scope(skaddr.node); if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr)) return -EHOSTUNREACH; } else if (atype == TIPC_SOCKET_ADDR) { skaddr = ua->sk; } else { return -EINVAL; } /* Block or return if destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tipc_dest_find(clinks, skaddr.node, 0)); if (unlikely(rc)) return rc; /* Finally build message header */ msg_set_destnode(hdr, skaddr.node); msg_set_destport(hdr, skaddr.ref); if (atype == TIPC_SERVICE_ADDR) { msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); msg_set_nametype(hdr, ua->sa.type); msg_set_nameinst(hdr, ua->sa.instance); msg_set_lookup_scope(hdr, ua->scope); } else { /* TIPC_SOCKET_ADDR */ msg_set_type(hdr, TIPC_DIRECT_MSG); msg_set_lookup_scope(hdr, 0); msg_set_hdr_sz(hdr, BASIC_H_SIZE); } /* Add message body */ __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) { __skb_queue_purge(&pkts); return -ENOMEM; } /* Send message */ trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid); if (unlikely(rc == -ELINKCONG)) { tipc_dest_push(clinks, skaddr.node, 0); tsk->cong_link_cnt++; rc = 0; } if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); if (dlen && timeout) { timeout = msecs_to_jiffies(timeout); tipc_wait_for_connect(sock, &timeout); } } return rc ? rc : dlen; } /** * tipc_sendstream - send stream-oriented data * @sock: socket structure * @m: data to send * @dsz: total length of data to be transmitted * * Used for SOCK_STREAM data. * * Return: the number of bytes sent on success (or partial success), * or errno if no data sent */ static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; int ret; lock_sock(sk); ret = __tipc_sendstream(sock, m, dsz); release_sock(sk); return ret; } static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct sk_buff_head *txq = &sk->sk_write_queue; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); struct sk_buff *skb; u32 dnode = tsk_peer_node(tsk); int maxnagle = tsk->maxnagle; int maxpkt = tsk->max_pkt; int send, sent = 0; int blocks, rc = 0; if (unlikely(dlen > INT_MAX)) return -EMSGSIZE; /* Handle implicit connection setup */ if (unlikely(dest && sk->sk_state == TIPC_OPEN)) { rc = __tipc_sendmsg(sock, m, dlen); if (dlen && dlen == rc) { tsk->peer_caps = tipc_node_get_capabilities(net, dnode); tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); } return rc; } do { rc = tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && !tsk_conn_cong(tsk) && tipc_sk_connected(sk))); if (unlikely(rc)) break; send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); blocks = tsk->snd_backlog; if (tsk->oneway++ >= tsk->nagle_start && maxnagle && send <= maxnagle) { rc = tipc_msg_append(hdr, m, send, maxnagle, txq); if (unlikely(rc < 0)) break; blocks += rc; tsk->msg_acc++; if (blocks <= 64 && tsk->expect_ack) { tsk->snd_backlog = blocks; sent += send; break; } else if (blocks > 64) { tsk->pkt_cnt += skb_queue_len(txq); } else { skb = skb_peek_tail(txq); if (skb) { msg_set_ack_required(buf_msg(skb)); tsk->expect_ack = true; } else { tsk->expect_ack = false; } tsk->msg_acc = 0; tsk->pkt_cnt = 0; } } else { rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq); if (unlikely(rc != send)) break; blocks += tsk_inc(tsk, send + MIN_H_SIZE); } trace_tipc_sk_sendstream(sk, skb_peek(txq), TIPC_DUMP_SK_SNDQ, " "); rc = tipc_node_xmit(net, txq, dnode, tsk->portid); if (unlikely(rc == -ELINKCONG)) { tsk->cong_link_cnt = 1; rc = 0; } if (likely(!rc)) { tsk->snt_unacked += blocks; tsk->snd_backlog = 0; sent += send; } } while (sent < dlen && !rc); return sent ? sent : rc; } /** * tipc_send_packet - send a connection-oriented message * @sock: socket structure * @m: message to send * @dsz: length of data to be transmitted * * Used for SOCK_SEQPACKET messages. * * Return: the number of bytes sent on success, or errno otherwise */ static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; return tipc_sendstream(sock, m, dsz); } /* tipc_sk_finish_conn - complete the setup of a connection */ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, u32 peer_node) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_msg *msg = &tsk->phdr; msg_set_syn(msg, 0); msg_set_destnode(msg, peer_node); msg_set_destport(msg, peer_port); msg_set_type(msg, TIPC_CONN_MSG); msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); tsk_set_nagle(tsk); __skb_queue_purge(&sk->sk_write_queue); if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) return; /* Fall back to message based flow control */ tsk->rcv_win = FLOWCTL_MSG_WIN; tsk->snd_win = FLOWCTL_MSG_WIN; } /** * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info * @skb: received message * * Note: Address is not captured if not requested by receiver. */ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) { DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name); struct tipc_msg *hdr = buf_msg(skb); if (!srcaddr) return; srcaddr->sock.family = AF_TIPC; srcaddr->sock.addrtype = TIPC_SOCKET_ADDR; srcaddr->sock.scope = 0; srcaddr->sock.addr.id.ref = msg_origport(hdr); srcaddr->sock.addr.id.node = msg_orignode(hdr); srcaddr->sock.addr.name.domain = 0; m->msg_namelen = sizeof(struct sockaddr_tipc); if (!msg_in_group(hdr)) return; /* Group message users may also want to know sending member's id */ srcaddr->member.family = AF_TIPC; srcaddr->member.addrtype = TIPC_SERVICE_ADDR; srcaddr->member.scope = 0; srcaddr->member.addr.name.name.type = msg_nametype(hdr); srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; srcaddr->member.addr.name.domain = 0; m->msg_namelen = sizeof(*srcaddr); } /** * tipc_sk_anc_data_recv - optionally capture ancillary data for received message * @m: descriptor for message info * @skb: received message buffer * @tsk: TIPC port associated with message * * Note: Ancillary data is not captured if not requested by receiver. * * Return: 0 if successful, otherwise errno */ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, struct tipc_sock *tsk) { struct tipc_msg *hdr; u32 data[3] = {0,}; bool has_addr; int dlen, rc; if (likely(m->msg_controllen == 0)) return 0; hdr = buf_msg(skb); dlen = msg_data_sz(hdr); /* Capture errored message object, if any */ if (msg_errcode(hdr)) { if (skb_linearize(skb)) return -ENOMEM; hdr = buf_msg(skb); data[0] = msg_errcode(hdr); data[1] = dlen; rc = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, data); if (rc || !dlen) return rc; rc = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, dlen, msg_data(hdr)); if (rc) return rc; } /* Capture TIPC_SERVICE_ADDR/RANGE destination address, if any */ switch (msg_type(hdr)) { case TIPC_NAMED_MSG: has_addr = true; data[0] = msg_nametype(hdr); data[1] = msg_namelower(hdr); data[2] = data[1]; break; case TIPC_MCAST_MSG: has_addr = true; data[0] = msg_nametype(hdr); data[1] = msg_namelower(hdr); data[2] = msg_nameupper(hdr); break; case TIPC_CONN_MSG: has_addr = !!tsk->conn_addrtype; data[0] = msg_nametype(&tsk->phdr); data[1] = msg_nameinst(&tsk->phdr); data[2] = data[1]; break; default: has_addr = false; } if (!has_addr) return 0; return put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, data); } static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); if (!tipc_sk_connected(sk)) return NULL; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_port, tsk->portid, TIPC_OK); if (!skb) return NULL; msg = buf_msg(skb); msg_set_conn_ack(msg, tsk->rcv_unacked); tsk->rcv_unacked = 0; /* Adjust to and advertize the correct window limit */ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) { tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); msg_set_adv_win(msg, tsk->rcv_win); } return skb; } static void tipc_sk_send_ack(struct tipc_sock *tsk) { struct sk_buff *skb; skb = tipc_sk_build_ack(tsk); if (!skb) return; tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk), msg_link_selector(buf_msg(skb))); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; DEFINE_WAIT_FUNC(wait, woken_wake_function); long timeo = *timeop; int err = sock_error(sk); if (err) return err; for (;;) { if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { if (sk->sk_shutdown & RCV_SHUTDOWN) { err = -ENOTCONN; break; } add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); sched_annotate_sleep(); lock_sock(sk); remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; err = -EAGAIN; if (!timeo) break; err = sock_intr_errno(timeo); if (signal_pending(current)) break; err = sock_error(sk); if (err) break; } *timeop = timeo; return err; } /** * tipc_recvmsg - receive packet-oriented message * @sock: network socket * @m: descriptor for message info * @buflen: length of user buffer area * @flags: receive flags * * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. * If the complete message doesn't fit in user area, truncate it. * * Return: size of returned message data, errno otherwise */ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buflen, int flags) { struct sock *sk = sock->sk; bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; bool grp_evt; long timeout; /* Catch invalid receive requests */ if (unlikely(!buflen)) return -EINVAL; lock_sock(sk); if (unlikely(connected && sk->sk_state == TIPC_OPEN)) { rc = -ENOTCONN; goto exit; } timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); /* Step rcv queue to first msg with data or error; wait if necessary */ do { rc = tipc_wait_for_rcvmsg(sock, &timeout); if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue); skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); err = msg_errcode(hdr); grp_evt = msg_is_grp_evt(hdr); if (likely(dlen || err)) break; tsk_advance_rx_queue(sk); } while (1); /* Collect msg meta data, including error code and rejected data */ tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, skb, tsk); if (unlikely(rc)) goto exit; hdr = buf_msg(skb); /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { int offset = skb_cb->bytes_read; copy = min_t(int, dlen - offset, buflen); rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); if (unlikely(rc)) goto exit; if (unlikely(offset + copy < dlen)) { if (flags & MSG_EOR) { if (!(flags & MSG_PEEK)) skb_cb->bytes_read = offset + copy; } else { m->msg_flags |= MSG_TRUNC; skb_cb->bytes_read = 0; } } else { if (flags & MSG_EOR) m->msg_flags |= MSG_EOR; skb_cb->bytes_read = 0; } } else { copy = 0; rc = 0; if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET; goto exit; } } /* Mark message as group event if applicable */ if (unlikely(grp_evt)) { if (msg_grp_evt(hdr) == TIPC_WITHDRAWN) m->msg_flags |= MSG_EOR; m->msg_flags |= MSG_OOB; copy = 0; } /* Caption of data or error code/rejected data was successful */ if (unlikely(flags & MSG_PEEK)) goto exit; /* Send group flow control advertisement when applicable */ if (tsk->group && msg_in_group(hdr) && !grp_evt) { __skb_queue_head_init(&xmitq); tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), msg_orignode(hdr), msg_origport(hdr), &xmitq); tipc_node_distr_xmit(sock_net(sk), &xmitq); } if (skb_cb->bytes_read) goto exit; tsk_advance_rx_queue(sk); if (likely(!connected)) goto exit; /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); exit: release_sock(sk); return rc ? rc : copy; } /** * tipc_recvstream - receive stream-oriented data * @sock: network socket * @m: descriptor for message info * @buflen: total size of user buffer area * @flags: receive flags * * Used for SOCK_STREAM messages only. If not enough data is available * will optionally wait for more; never truncates data. * * Return: size of returned message data, errno otherwise */ static int tipc_recvstream(struct socket *sock, struct msghdr *m, size_t buflen, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; struct tipc_msg *hdr; struct tipc_skb_cb *skb_cb; bool peek = flags & MSG_PEEK; int offset, required, copy, copied = 0; int hlen, dlen, err, rc; long timeout; /* Catch invalid receive attempts */ if (unlikely(!buflen)) return -EINVAL; lock_sock(sk); if (unlikely(sk->sk_state == TIPC_OPEN)) { rc = -ENOTCONN; goto exit; } required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { /* Look at first msg in receive queue; wait if necessary */ rc = tipc_wait_for_rcvmsg(sock, &timeout); if (unlikely(rc)) break; skb = skb_peek(&sk->sk_receive_queue); skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); err = msg_errcode(hdr); /* Discard any empty non-errored (SYN-) message */ if (unlikely(!dlen && !err)) { tsk_advance_rx_queue(sk); continue; } /* Collect msg meta data, incl. error code and rejected data */ if (!copied) { tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, skb, tsk); if (rc) break; hdr = buf_msg(skb); } /* Copy data if msg ok, otherwise return error/partial data */ if (likely(!err)) { offset = skb_cb->bytes_read; copy = min_t(int, dlen - offset, buflen - copied); rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); if (unlikely(rc)) break; copied += copy; offset += copy; if (unlikely(offset < dlen)) { if (!peek) skb_cb->bytes_read = offset; break; } } else { rc = 0; if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control) rc = -ECONNRESET; if (copied || rc) break; } if (unlikely(peek)) break; tsk_advance_rx_queue(sk); /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); /* Exit if all requested data or FIN/error received */ if (copied == buflen || err) break; } while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required); exit: release_sock(sk); return copied ? copied : rc; } /** * tipc_write_space - wake up thread if port congestion is released * @sk: socket */ static void tipc_write_space(struct sock *sk) { struct socket_wq *wq; rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); rcu_read_unlock(); } /** * tipc_data_ready - wake up threads to indicate messages have been received * @sk: socket */ static void tipc_data_ready(struct sock *sk) { struct socket_wq *wq; trace_sk_data_ready(sk); rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLRDNORM | EPOLLRDBAND); rcu_read_unlock(); } static void tipc_sock_destruct(struct sock *sk) { __skb_queue_purge(&sk->sk_receive_queue); } static void tipc_sk_proto_rcv(struct sock *sk, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct sk_buff *skb = __skb_dequeue(inputq); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); struct tipc_group *grp = tsk->group; bool wakeup = false; switch (msg_user(hdr)) { case CONN_MANAGER: tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); return; case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); /* coupled with smp_rmb() in tipc_wait_for_cond() */ smp_wmb(); tsk->cong_link_cnt--; wakeup = true; tipc_sk_push_backlog(tsk, false); break; case GROUP_PROTOCOL: tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); break; case TOP_SRV: tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, hdr, inputq, xmitq); break; default: break; } if (wakeup) sk->sk_write_space(sk); kfree_skb(skb); } /** * tipc_sk_filter_connect - check incoming message for a connection-based socket * @tsk: TIPC socket * @skb: pointer to message buffer. * @xmitq: for Nagle ACK if any * Return: true if message should be added to receive queue, false otherwise */ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_msg *hdr = buf_msg(skb); bool con_msg = msg_connected(hdr); u32 pport = tsk_peer_port(tsk); u32 pnode = tsk_peer_node(tsk); u32 oport = msg_origport(hdr); u32 onode = msg_orignode(hdr); int err = msg_errcode(hdr); unsigned long delay; if (unlikely(msg_mcast(hdr))) return false; tsk->oneway = 0; switch (sk->sk_state) { case TIPC_CONNECTING: /* Setup ACK */ if (likely(con_msg)) { if (err) break; tipc_sk_finish_conn(tsk, oport, onode); msg_set_importance(&tsk->phdr, msg_importance(hdr)); /* ACK+ message with data is added to receive queue */ if (msg_data_sz(hdr)) return true; /* Empty ACK-, - wake up sleeping connect() and drop */ sk->sk_state_change(sk); msg_set_dest_droppable(hdr, 1); return false; } /* Ignore connectionless message if not from listening socket */ if (oport != pport || onode != pnode) return false; /* Rejected SYN */ if (err != TIPC_ERR_OVERLOAD) break; /* Prepare for new setup attempt if we have a SYN clone */ if (skb_queue_empty(&sk->sk_write_queue)) break; get_random_bytes(&delay, 2); delay %= (tsk->conn_timeout / 4); delay = msecs_to_jiffies(delay + 100); sk_reset_timer(sk, &sk->sk_timer, jiffies + delay); return false; case TIPC_OPEN: case TIPC_DISCONNECTING: return false; case TIPC_LISTEN: /* Accept only SYN message */ if (!msg_is_syn(hdr) && tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT) return false; if (!con_msg && !err) return true; return false; case TIPC_ESTABLISHED: if (!skb_queue_empty(&sk->sk_write_queue)) tipc_sk_push_backlog(tsk, false); /* Accept only connection-based messages sent by peer */ if (likely(con_msg && !err && pport == oport && pnode == onode)) { if (msg_ack_required(hdr)) { struct sk_buff *skb; skb = tipc_sk_build_ack(tsk); if (skb) { msg_set_nagle_ack(buf_msg(skb)); __skb_queue_tail(xmitq, skb); } } return true; } if (!tsk_peer_msg(tsk, hdr)) return false; if (!err) return true; tipc_set_sk_state(sk, TIPC_DISCONNECTING); tipc_node_remove_conn(net, pnode, tsk->portid); sk->sk_state_change(sk); return true; default: pr_err("Unknown sk_state %u\n", sk->sk_state); } /* Abort connection setup attempt */ tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; sk->sk_state_change(sk); return true; } /** * rcvbuf_limit - get proper overload limit of socket receive queue * @sk: socket * @skb: message * * For connection oriented messages, irrespective of importance, * default queue limit is 2 MB. * * For connectionless messages, queue limits are based on message * importance as follows: * * TIPC_LOW_IMPORTANCE (2 MB) * TIPC_MEDIUM_IMPORTANCE (4 MB) * TIPC_HIGH_IMPORTANCE (8 MB) * TIPC_CRITICAL_IMPORTANCE (16 MB) * * Return: overload limit according to corresponding message importance */ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) { struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); if (unlikely(msg_in_group(hdr))) return READ_ONCE(sk->sk_rcvbuf); if (unlikely(!msg_connected(hdr))) return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr); if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) return READ_ONCE(sk->sk_rcvbuf); return FLOWCTL_MSG_LIM; } /** * tipc_sk_filter_rcv - validate incoming message * @sk: socket * @skb: pointer to message. * @xmitq: output message area (FIXME) * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * * Called with socket lock already taken */ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *xmitq) { bool sk_conn = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; int mtyp = msg_type(hdr); int limit, err = TIPC_OK; trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); TIPC_SKB_CB(skb)->bytes_read = 0; __skb_queue_head_init(&inputq); __skb_queue_tail(&inputq, skb); if (unlikely(!msg_isdata(hdr))) tipc_sk_proto_rcv(sk, &inputq, xmitq); if (unlikely(grp)) tipc_group_filter_msg(grp, &inputq, xmitq); if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq); /* Validate and add to receive buffer if there is space */ while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); limit = rcvbuf_limit(sk, skb); if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) || (!sk_conn && msg_connected(hdr)) || (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload2!"); atomic_inc(&sk->sk_drops); err = TIPC_ERR_OVERLOAD; } if (unlikely(err)) { if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) { trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, "@filter_rcv!"); __skb_queue_tail(xmitq, skb); } err = TIPC_OK; continue; } __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); trace_tipc_sk_overlimit2(sk, skb, TIPC_DUMP_ALL, "rcvq >90% allocated!"); sk->sk_data_ready(sk); } } /** * tipc_sk_backlog_rcv - handle incoming message from backlog queue * @sk: socket * @skb: message * * Caller must hold socket lock */ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { unsigned int before = sk_rmem_alloc_get(sk); struct sk_buff_head xmitq; unsigned int added; __skb_queue_head_init(&xmitq); tipc_sk_filter_rcv(sk, skb, &xmitq); added = sk_rmem_alloc_get(sk) - before; atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); /* Send pending response/rejected messages, if any */ tipc_node_distr_xmit(sock_net(sk), &xmitq); return 0; } /** * tipc_sk_enqueue - extract all buffers with destination 'dport' from * inputq and try adding them to socket or backlog queue * @inputq: list of incoming buffers with potentially different destinations * @sk: socket where the buffers should be enqueued * @dport: port number for the socket * @xmitq: output queue * * Caller must hold socket lock */ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { unsigned long time_limit = jiffies + usecs_to_jiffies(20000); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; u32 onode; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) return; skb = tipc_skb_dequeue(inputq, dport); if (unlikely(!skb)) return; /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { tipc_sk_filter_rcv(sk, skb, xmitq); continue; } /* Try backlog, compensating for double-counted bytes */ dcnt = &tipc_sk(sk)->dupl_rcvcnt; if (!sk->sk_backlog.len) atomic_set(dcnt, 0); lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); if (likely(!sk_add_backlog(sk, skb, lim))) { trace_tipc_sk_overlimit1(sk, skb, TIPC_DUMP_ALL, "bklg & rcvq >90% allocated!"); continue; } trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!"); /* Overload => reject message back to sender */ onode = tipc_own_addr(sock_net(sk)); atomic_inc(&sk->sk_drops); if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) { trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL, "@sk_enqueue!"); __skb_queue_tail(xmitq, skb); } break; } } /** * tipc_sk_rcv - handle a chain of incoming buffers * @net: the associated network namespace * @inputq: buffer list containing the buffers * Consumes all buffers in list until inputq is empty * Note: may be called in multiple threads referring to the same queue */ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { struct sk_buff_head xmitq; u32 dnode, dport = 0; int err; struct tipc_sock *tsk; struct sock *sk; struct sk_buff *skb; __skb_queue_head_init(&xmitq); while (skb_queue_len(inputq)) { dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { tipc_sk_enqueue(inputq, sk, dport, &xmitq); spin_unlock_bh(&sk->sk_lock.slock); } /* Send pending response/rejected messages, if any */ tipc_node_distr_xmit(sock_net(sk), &xmitq); sock_put(sk); continue; } /* No destination socket => dequeue skb if still there */ skb = tipc_skb_dequeue(inputq, dport); if (!skb) return; /* Try secondary lookup if unresolved named message */ err = TIPC_ERR_NO_PORT; if (tipc_msg_lookup_dest(net, skb, &err)) goto xmit; /* Prepare for message rejection */ if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err)) continue; trace_tipc_sk_rej_msg(NULL, skb, TIPC_DUMP_NONE, "@sk_rcv!"); xmit: dnode = msg_destnode(buf_msg(skb)); tipc_node_xmit_skb(net, skb, dnode, dport); } } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; int done; do { int err = sock_error(sk); if (err) return err; if (!*timeo_p) return -ETIMEDOUT; if (signal_pending(current)) return sock_intr_errno(*timeo_p); if (sk->sk_state == TIPC_DISCONNECTING) break; add_wait_queue(sk_sleep(sk), &wait); done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk), &wait); remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) { if (addr->family != AF_TIPC) return false; if (addr->addrtype == TIPC_SERVICE_RANGE) return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); return (addr->addrtype == TIPC_SERVICE_ADDR || addr->addrtype == TIPC_SOCKET_ADDR); } /** * tipc_connect - establish a connection to another TIPC port * @sock: socket structure * @dest: socket address for destination port * @destlen: size of socket address data structure * @flags: file-related flags associated with socket * * Return: 0 on success, errno otherwise */ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int destlen, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; int previous; int res = 0; if (destlen != sizeof(struct sockaddr_tipc)) return -EINVAL; lock_sock(sk); if (tsk->group) { res = -EINVAL; goto exit; } if (dst->family == AF_UNSPEC) { memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); if (!tipc_sk_type_connectionless(sk)) res = -EINVAL; goto exit; } if (!tipc_sockaddr_is_sane(dst)) { res = -EINVAL; goto exit; } /* DGRAM/RDM connect(), just save the destaddr */ if (tipc_sk_type_connectionless(sk)) { memcpy(&tsk->peer, dest, destlen); goto exit; } else if (dst->addrtype == TIPC_SERVICE_RANGE) { res = -EINVAL; goto exit; } previous = sk->sk_state; switch (sk->sk_state) { case TIPC_OPEN: /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); /* If connect is in non-blocking case, set MSG_DONTWAIT to * indicate send_msg() is never blocked. */ if (!timeout) m.msg_flags = MSG_DONTWAIT; res = __tipc_sendmsg(sock, &m, 0); if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; /* Just entered TIPC_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; fallthrough; case TIPC_CONNECTING: if (!timeout) { if (previous == TIPC_CONNECTING) res = -EALREADY; goto exit; } timeout = msecs_to_jiffies(timeout); /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ res = tipc_wait_for_connect(sock, &timeout); break; case TIPC_ESTABLISHED: res = -EISCONN; break; default: res = -EINVAL; } exit: release_sock(sk); return res; } /** * tipc_listen - allow socket to listen for incoming connections * @sock: socket structure * @len: (unused) * * Return: 0 on success, errno otherwise */ static int tipc_listen(struct socket *sock, int len) { struct sock *sk = sock->sk; int res; lock_sock(sk); res = tipc_set_sk_state(sk, TIPC_LISTEN); release_sock(sk); return res; } static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only * one process gets woken up, not the 'whole herd'. * Since we do not 'race & poll' for established sockets * anymore, the common case will execute the loop only once. */ for (;;) { if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; err = -EAGAIN; if (!timeo) break; err = sock_intr_errno(timeo); if (signal_pending(current)) break; } return err; } /** * tipc_accept - wait for connection request * @sock: listening socket * @new_sock: new socket that is to be connected * @arg: arguments for accept * * Return: 0 on success, errno otherwise */ static int tipc_accept(struct socket *sock, struct socket *new_sock, struct proto_accept_arg *arg) { struct sock *new_sk, *sk = sock->sk; struct tipc_sock *new_tsock; struct msghdr m = {NULL,}; struct tipc_msg *msg; struct sk_buff *buf; long timeo; int res; lock_sock(sk); if (sk->sk_state != TIPC_LISTEN) { res = -EINVAL; goto exit; } timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); res = tipc_wait_for_accept(sock, timeo); if (res) goto exit; buf = skb_peek(&sk->sk_receive_queue); res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); msg = buf_msg(buf); /* we lock on new_sk; but lockdep sees the lock on sk */ lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING); /* * Reject any stray messages received by new socket * before the socket lock was taken (very, very unlikely) */ tsk_rej_rx_queue(new_sk, TIPC_ERR_NO_PORT); /* Connect new socket to it's peer */ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); tsk_set_importance(new_sk, msg_importance(msg)); if (msg_named(msg)) { new_tsock->conn_addrtype = TIPC_SERVICE_ADDR; msg_set_nametype(&new_tsock->phdr, msg_nametype(msg)); msg_set_nameinst(&new_tsock->phdr, msg_nameinst(msg)); } /* * Respond to 'SYN-' by discarding it & returning 'ACK'. * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'. */ if (!msg_data_sz(msg)) { tsk_advance_rx_queue(sk); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: release_sock(sk); return res; } /** * tipc_shutdown - shutdown socket connection * @sock: socket structure * @how: direction to close (must be SHUT_RDWR) * * Terminates connection (if necessary), then purges socket's receive queue. * * Return: 0 on success, errno otherwise */ static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int res; if (how != SHUT_RDWR) return -EINVAL; lock_sock(sk); trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ __skb_queue_purge(&sk->sk_receive_queue); res = 0; } else { res = -ENOTCONN; } /* Wake up anyone sleeping in poll. */ sk->sk_state_change(sk); release_sock(sk); return res; } static void tipc_sk_check_probing_state(struct sock *sk, struct sk_buff_head *list) { struct tipc_sock *tsk = tipc_sk(sk); u32 pnode = tsk_peer_node(tsk); u32 pport = tsk_peer_port(tsk); u32 self = tsk_own_node(tsk); u32 oport = tsk->portid; struct sk_buff *skb; if (tsk->probe_unacked) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNABORTED; tipc_node_remove_conn(sock_net(sk), pnode, pport); sk->sk_state_change(sk); return; } /* Prepare new probe */ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, pnode, self, pport, oport, TIPC_OK); if (skb) __skb_queue_tail(list, skb); tsk->probe_unacked = true; sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); } static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) { struct tipc_sock *tsk = tipc_sk(sk); /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { sk_reset_timer(sk, &sk->sk_timer, jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ tipc_msg_skb_clone(&sk->sk_write_queue, list); } static void tipc_sk_timeout(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); struct tipc_sock *tsk = tipc_sk(sk); u32 pnode = tsk_peer_node(tsk); struct sk_buff_head list; int rc = 0; __skb_queue_head_init(&list); bh_lock_sock(sk); /* Try again later if socket is busy */ if (sock_owned_by_user(sk)) { sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); bh_unlock_sock(sk); sock_put(sk); return; } if (sk->sk_state == TIPC_ESTABLISHED) tipc_sk_check_probing_state(sk, &list); else if (sk->sk_state == TIPC_CONNECTING) tipc_sk_retry_connect(sk, &list); bh_unlock_sock(sk); if (!skb_queue_empty(&list)) rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid); /* SYN messages may cause link congestion */ if (rc == -ELINKCONG) { tipc_dest_push(&tsk->cong_links, pnode, 0); tsk->cong_link_cnt = 1; } sock_put(sk); } static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_socket_addr skaddr; struct publication *p; u32 key; if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; if (key == tsk->portid) return -EADDRINUSE; skaddr.ref = tsk->portid; skaddr.node = tipc_own_addr(net); p = tipc_nametbl_publish(net, ua, &skaddr, key); if (unlikely(!p)) return -EINVAL; list_add(&p->binding_sock, &tsk->publications); tsk->pub_count++; tsk->published = true; return 0; } static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct net *net = sock_net(&tsk->sk); struct publication *safe, *p; struct tipc_uaddr _ua; int rc = -EINVAL; list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) { if (!ua) { tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type, p->sr.lower, p->sr.upper); tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key); continue; } /* Unbind specific publication */ if (p->scope != ua->scope) continue; if (p->sr.type != ua->sr.type) continue; if (p->sr.lower != ua->sr.lower) continue; if (p->sr.upper != ua->sr.upper) break; tipc_nametbl_withdraw(net, ua, &p->sk, p->key); rc = 0; break; } if (list_empty(&tsk->publications)) { tsk->published = 0; rc = 0; } return rc; } /* tipc_sk_reinit: set non-zero address in all existing sockets * when we go from standalone to network mode. */ void tipc_sk_reinit(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct rhashtable_iter iter; struct tipc_sock *tsk; struct tipc_msg *msg; rhashtable_walk_enter(&tn->sk_rht, &iter); do { rhashtable_walk_start(&iter); while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { sock_hold(&tsk->sk); rhashtable_walk_stop(&iter); lock_sock(&tsk->sk); msg = &tsk->phdr; msg_set_prevnode(msg, tipc_own_addr(net)); msg_set_orignode(msg, tipc_own_addr(net)); release_sock(&tsk->sk); rhashtable_walk_start(&iter); sock_put(&tsk->sk); } rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); rhashtable_walk_exit(&iter); } static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_sock *tsk; rcu_read_lock(); tsk = rhashtable_lookup(&tn->sk_rht, &portid, tsk_rht_params); if (tsk) sock_hold(&tsk->sk); rcu_read_unlock(); return tsk; } static int tipc_sk_insert(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_net *tn = net_generic(net, tipc_net_id); u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; u32 portid = get_random_u32_below(remaining) + TIPC_MIN_PORT; while (remaining--) { portid++; if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) portid = TIPC_MIN_PORT; tsk->portid = portid; sock_hold(&tsk->sk); if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) return 0; sock_put(&tsk->sk); } return -1; } static void tipc_sk_remove(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } static const struct rhashtable_params tsk_rht_params = { .nelem_hint = 192, .head_offset = offsetof(struct tipc_sock, node), .key_offset = offsetof(struct tipc_sock, portid), .key_len = sizeof(u32), /* portid */ .max_size = 1048576, .min_size = 256, .automatic_shrinking = true, }; int tipc_sk_rht_init(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); return rhashtable_init(&tn->sk_rht, &tsk_rht_params); } void tipc_sk_rht_destroy(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); /* Wait for socket readers to complete */ synchronize_net(); rhashtable_destroy(&tn->sk_rht); } static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) { struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_uaddr ua; int rc; if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; if (mreq->scope > TIPC_NODE_SCOPE) return -EINVAL; if (mreq->scope != TIPC_NODE_SCOPE) mreq->scope = TIPC_CLUSTER_SCOPE; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); if (!grp) return -ENOMEM; tsk->group = grp; msg_set_lookup_scope(hdr, mreq->scope); msg_set_nametype(hdr, mreq->type); msg_set_dest_droppable(hdr, true); tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope, mreq->type, mreq->instance, mreq->instance); tipc_nametbl_build_group(net, grp, &ua); rc = tipc_sk_publish(tsk, &ua); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; return rc; } /* Eliminate any risk that a broadcast overtakes sent JOINs */ tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); return rc; } static int tipc_sk_leave(struct tipc_sock *tsk) { struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; struct tipc_uaddr ua; int scope; if (!grp) return -EINVAL; ua.addrtype = TIPC_SERVICE_RANGE; tipc_group_self(grp, &ua.sr, &scope); ua.scope = scope; tipc_group_delete(net, grp); tsk->group = NULL; tipc_sk_withdraw(tsk, &ua); return 0; } /** * tipc_setsockopt - set socket option * @sock: socket structure * @lvl: option level * @opt: option identifier * @ov: pointer to new option value * @ol: length of option value * * For stream sockets only, accepts and ignores all IPPROTO_TCP options * (to ease compatibility). * * Return: 0 on success, errno otherwise */ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov, unsigned int ol) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group_req mreq; u32 value = 0; int res = 0; if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) return 0; if (lvl != SOL_TIPC) return -ENOPROTOOPT; switch (opt) { case TIPC_IMPORTANCE: case TIPC_SRC_DROPPABLE: case TIPC_DEST_DROPPABLE: case TIPC_CONN_TIMEOUT: case TIPC_NODELAY: if (ol < sizeof(value)) return -EINVAL; if (copy_from_sockptr(&value, ov, sizeof(u32))) return -EFAULT; break; case TIPC_GROUP_JOIN: if (ol < sizeof(mreq)) return -EINVAL; if (copy_from_sockptr(&mreq, ov, sizeof(mreq))) return -EFAULT; break; default: if (!sockptr_is_null(ov) || ol) return -EINVAL; } lock_sock(sk); switch (opt) { case TIPC_IMPORTANCE: res = tsk_set_importance(sk, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) tsk_set_unreliable(tsk, value); else res = -ENOPROTOOPT; break; case TIPC_DEST_DROPPABLE: tsk_set_unreturnable(tsk, value); break; case TIPC_CONN_TIMEOUT: tipc_sk(sk)->conn_timeout = value; break; case TIPC_MCAST_BROADCAST: tsk->mc_method.rcast = false; tsk->mc_method.mandatory = true; break; case TIPC_MCAST_REPLICAST: tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; break; case TIPC_GROUP_JOIN: res = tipc_sk_join(tsk, &mreq); break; case TIPC_GROUP_LEAVE: res = tipc_sk_leave(tsk); break; case TIPC_NODELAY: tsk->nodelay = !!value; tsk_set_nagle(tsk); break; default: res = -EINVAL; } release_sock(sk); return res; } /** * tipc_getsockopt - get socket option * @sock: socket structure * @lvl: option level * @opt: option identifier * @ov: receptacle for option value * @ol: receptacle for length of option value * * For stream sockets only, returns 0 length result for all IPPROTO_TCP options * (to ease compatibility). * * Return: 0 on success, errno otherwise */ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, int __user *ol) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_service_range seq; int len, scope; u32 value; int res; if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) return put_user(0, ol); if (lvl != SOL_TIPC) return -ENOPROTOOPT; res = get_user(len, ol); if (res) return res; lock_sock(sk); switch (opt) { case TIPC_IMPORTANCE: value = tsk_importance(tsk); break; case TIPC_SRC_DROPPABLE: value = tsk_unreliable(tsk); break; case TIPC_DEST_DROPPABLE: value = tsk_unreturnable(tsk); break; case TIPC_CONN_TIMEOUT: value = tsk->conn_timeout; /* no need to set "res", since already 0 at this point */ break; case TIPC_NODE_RECVQ_DEPTH: value = 0; /* was tipc_queue_size, now obsolete */ break; case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; case TIPC_SOCK_RECVQ_USED: value = sk_rmem_alloc_get(sk); break; case TIPC_GROUP_JOIN: seq.type = 0; if (tsk->group) tipc_group_self(tsk->group, &seq, &scope); value = seq.type; break; default: res = -EINVAL; } release_sock(sk); if (res) return res; /* "get" failed */ if (len < sizeof(value)) return -EINVAL; if (copy_to_user(ov, &value, sizeof(value))) return -EFAULT; return put_user(sizeof(value), ol); } static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct net *net = sock_net(sock->sk); struct tipc_sioc_nodeid_req nr = {0}; struct tipc_sioc_ln_req lnr; void __user *argp = (void __user *)arg; switch (cmd) { case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; if (!tipc_node_get_linkname(net, lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; return 0; } return -EADDRNOTAVAIL; case SIOCGETNODEID: if (copy_from_user(&nr, argp, sizeof(nr))) return -EFAULT; if (!tipc_node_get_id(net, nr.peer, nr.node_id)) return -EADDRNOTAVAIL; if (copy_to_user(argp, &nr, sizeof(nr))) return -EFAULT; return 0; default: return -ENOIOCTLCMD; } } static int tipc_socketpair(struct socket *sock1, struct socket *sock2) { struct tipc_sock *tsk2 = tipc_sk(sock2->sk); struct tipc_sock *tsk1 = tipc_sk(sock1->sk); u32 onode = tipc_own_addr(sock_net(sock1->sk)); tsk1->peer.family = AF_TIPC; tsk1->peer.addrtype = TIPC_SOCKET_ADDR; tsk1->peer.scope = TIPC_NODE_SCOPE; tsk1->peer.addr.id.ref = tsk2->portid; tsk1->peer.addr.id.node = onode; tsk2->peer.family = AF_TIPC; tsk2->peer.addrtype = TIPC_SOCKET_ADDR; tsk2->peer.scope = TIPC_NODE_SCOPE; tsk2->peer.addr.id.ref = tsk1->portid; tsk2->peer.addr.id.node = onode; tipc_sk_finish_conn(tsk1, tsk2->portid, onode); tipc_sk_finish_conn(tsk2, tsk1->portid, onode); return 0; } /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, .getsockopt = tipc_getsockopt, .sendmsg = tipc_sendmsg, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, }; static const struct proto_ops packet_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, .getsockopt = tipc_getsockopt, .sendmsg = tipc_send_packet, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, }; static const struct proto_ops stream_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, .getsockopt = tipc_getsockopt, .sendmsg = tipc_sendstream, .recvmsg = tipc_recvstream, .mmap = sock_no_mmap, }; static const struct net_proto_family tipc_family_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .create = tipc_sk_create }; static struct proto tipc_proto = { .name = "TIPC", .owner = THIS_MODULE, .obj_size = sizeof(struct tipc_sock), .sysctl_rmem = sysctl_tipc_rmem }; /** * tipc_socket_init - initialize TIPC socket interface * * Return: 0 on success, errno otherwise */ int tipc_socket_init(void) { int res; res = proto_register(&tipc_proto, 1); if (res) { pr_err("Failed to register TIPC protocol type\n"); goto out; } res = sock_register(&tipc_family_ops); if (res) { pr_err("Failed to register TIPC socket type\n"); proto_unregister(&tipc_proto); goto out; } out: return res; } /** * tipc_socket_stop - stop TIPC socket interface */ void tipc_socket_stop(void) { sock_unregister(tipc_family_ops.family); proto_unregister(&tipc_proto); } /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) { u32 peer_node, peer_port; u32 conn_type, conn_instance; struct nlattr *nest; peer_node = tsk_peer_node(tsk); peer_port = tsk_peer_port(tsk); conn_type = msg_nametype(&tsk->phdr); conn_instance = msg_nameinst(&tsk->phdr); nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) goto msg_full; if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) goto msg_full; if (tsk->conn_addrtype != 0) { if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) goto msg_full; if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, conn_type)) goto msg_full; if (nla_put_u32(skb, TIPC_NLA_CON_INST, conn_instance)) goto msg_full; } nla_nest_end(skb, nest); return 0; msg_full: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock *tsk) { struct net *net = sock_net(skb->sk); struct sock *sk = &tsk->sk; if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) || nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net))) return -EMSGSIZE; if (tipc_sk_connected(sk)) { if (__tipc_nl_add_sk_con(skb, tsk)) return -EMSGSIZE; } else if (!list_empty(&tsk->publications)) { if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) return -EMSGSIZE; } return 0; } /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk) { struct nlattr *attrs; void *hdr; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); if (!hdr) goto msg_cancel; attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; if (__tipc_nl_add_sk_info(skb, tsk)) goto attr_msg_cancel; nla_nest_end(skb, attrs); genlmsg_end(skb, hdr); return 0; attr_msg_cancel: nla_nest_cancel(skb, attrs); genlmsg_cancel: genlmsg_cancel(skb, hdr); msg_cancel: return -EMSGSIZE; } int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, int (*skb_handler)(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk)) { struct rhashtable_iter *iter = (void *)cb->args[4]; struct tipc_sock *tsk; int err; rhashtable_walk_start(iter); while ((tsk = rhashtable_walk_next(iter)) != NULL) { if (IS_ERR(tsk)) { if (PTR_ERR(tsk) == -EAGAIN) continue; break; } sock_hold(&tsk->sk); rhashtable_walk_stop(iter); lock_sock(&tsk->sk); err = skb_handler(skb, cb, tsk); if (err) { release_sock(&tsk->sk); sock_put(&tsk->sk); goto out; } release_sock(&tsk->sk); rhashtable_walk_start(iter); sock_put(&tsk->sk); } rhashtable_walk_stop(iter); out: return skb->len; } EXPORT_SYMBOL(tipc_nl_sk_walk); int tipc_dump_start(struct netlink_callback *cb) { return __tipc_dump_start(cb, sock_net(cb->skb->sk)); } EXPORT_SYMBOL(tipc_dump_start); int __tipc_dump_start(struct netlink_callback *cb, struct net *net) { /* tipc_nl_name_table_dump() uses cb->args[0...3]. */ struct rhashtable_iter *iter = (void *)cb->args[4]; struct tipc_net *tn = tipc_net(net); if (!iter) { iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[4] = (long)iter; } rhashtable_walk_enter(&tn->sk_rht, iter); return 0; } int tipc_dump_done(struct netlink_callback *cb) { struct rhashtable_iter *hti = (void *)cb->args[4]; rhashtable_walk_exit(hti); kfree(hti); return 0; } EXPORT_SYMBOL(tipc_dump_done); int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk, u32 sk_filter_state, u64 (*tipc_diag_gen_cookie)(struct sock *sk)) { struct sock *sk = &tsk->sk; struct nlattr *attrs; struct nlattr *stat; /*filter response w.r.t sk_state*/ if (!(sk_filter_state & (1 << sk->sk_state))) return 0; attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto msg_cancel; if (__tipc_nl_add_sk_info(skb, tsk)) goto attr_msg_cancel; if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) || nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) || nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || nla_put_u32(skb, TIPC_NLA_SOCK_UID, from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk), sock_i_uid(sk))) || nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, tipc_diag_gen_cookie(sk), TIPC_NLA_SOCK_PAD)) goto attr_msg_cancel; stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT); if (!stat) goto attr_msg_cancel; if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ, skb_queue_len(&sk->sk_receive_queue)) || nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, skb_queue_len(&sk->sk_write_queue)) || nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, atomic_read(&sk->sk_drops))) goto stat_msg_cancel; if (tsk->cong_link_cnt && nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG)) goto stat_msg_cancel; if (tsk_conn_cong(tsk) && nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG)) goto stat_msg_cancel; nla_nest_end(skb, stat); if (tsk->group) if (tipc_group_fill_sock_diag(tsk->group, skb)) goto stat_msg_cancel; nla_nest_end(skb, attrs); return 0; stat_msg_cancel: nla_nest_cancel(skb, stat); attr_msg_cancel: nla_nest_cancel(skb, attrs); msg_cancel: return -EMSGSIZE; } EXPORT_SYMBOL(tipc_sk_fill_sock_diag); int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) { return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk); } /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, struct netlink_callback *cb, struct publication *publ) { void *hdr; struct nlattr *attrs; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); if (!hdr) goto msg_cancel; attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL); if (!attrs) goto genlmsg_cancel; if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) goto attr_msg_cancel; if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type)) goto attr_msg_cancel; if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower)) goto attr_msg_cancel; if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper)) goto attr_msg_cancel; nla_nest_end(skb, attrs); genlmsg_end(skb, hdr); return 0; attr_msg_cancel: nla_nest_cancel(skb, attrs); genlmsg_cancel: genlmsg_cancel(skb, hdr); msg_cancel: return -EMSGSIZE; } /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk, u32 *last_publ) { int err; struct publication *p; if (*last_publ) { list_for_each_entry(p, &tsk->publications, binding_sock) { if (p->key == *last_publ) break; } if (list_entry_is_head(p, &tsk->publications, binding_sock)) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback * handler. Resulting in the last NLMSG_DONE message * having the NLM_F_DUMP_INTR flag set. */ cb->prev_seq = 1; *last_publ = 0; return -EPIPE; } } else { p = list_first_entry(&tsk->publications, struct publication, binding_sock); } list_for_each_entry_from(p, &tsk->publications, binding_sock) { err = __tipc_nl_add_sk_publ(skb, cb, p); if (err) { *last_publ = p->key; return err; } } *last_publ = 0; return 0; } int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; u32 tsk_portid = cb->args[0]; u32 last_publ = cb->args[1]; u32 done = cb->args[2]; struct net *net = sock_net(skb->sk); struct tipc_sock *tsk; if (!tsk_portid) { struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], tipc_nl_sock_policy, NULL); if (err) return err; if (!sock[TIPC_NLA_SOCK_REF]) return -EINVAL; tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); } if (done) return 0; tsk = tipc_sk_lookup(net, tsk_portid); if (!tsk) return -EINVAL; lock_sock(&tsk->sk); err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); if (!err) done = 1; release_sock(&tsk->sk); sock_put(&tsk->sk); cb->args[0] = tsk_portid; cb->args[1] = last_publ; cb->args[2] = done; return skb->len; } /** * tipc_sk_filtering - check if a socket should be traced * @sk: the socket to be examined * * @sysctl_tipc_sk_filter is used as the socket tuple for filtering: * (portid, sock type, name type, name lower, name upper) * * Return: true if the socket meets the socket tuple data * (value 0 = 'any') or when there is no tuple set (all = 0), * otherwise false */ bool tipc_sk_filtering(struct sock *sk) { struct tipc_sock *tsk; struct publication *p; u32 _port, _sktype, _type, _lower, _upper; u32 type = 0, lower = 0, upper = 0; if (!sk) return true; tsk = tipc_sk(sk); _port = sysctl_tipc_sk_filter[0]; _sktype = sysctl_tipc_sk_filter[1]; _type = sysctl_tipc_sk_filter[2]; _lower = sysctl_tipc_sk_filter[3]; _upper = sysctl_tipc_sk_filter[4]; if (!_port && !_sktype && !_type && !_lower && !_upper) return true; if (_port) return (_port == tsk->portid); if (_sktype && _sktype != sk->sk_type) return false; if (tsk->published) { p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); if (p) { type = p->sr.type; lower = p->sr.lower; upper = p->sr.upper; } } if (!tipc_sk_type_connectionless(sk)) { type = msg_nametype(&tsk->phdr); lower = msg_nameinst(&tsk->phdr); upper = lower; } if ((_type && _type != type) || (_lower && _lower != lower) || (_upper && _upper != upper)) return false; return true; } u32 tipc_sock_get_portid(struct sock *sk) { return (sk) ? (tipc_sk(sk))->portid : 0; } /** * tipc_sk_overlimit1 - check if socket rx queue is about to be overloaded, * both the rcv and backlog queues are considered * @sk: tipc sk to be checked * @skb: tipc msg to be checked * * Return: true if the socket rx queue allocation is > 90%, otherwise false */ bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb) { atomic_t *dcnt = &tipc_sk(sk)->dupl_rcvcnt; unsigned int lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); unsigned int qsize = sk->sk_backlog.len + sk_rmem_alloc_get(sk); return (qsize > lim * 90 / 100); } /** * tipc_sk_overlimit2 - check if socket rx queue is about to be overloaded, * only the rcv queue is considered * @sk: tipc sk to be checked * @skb: tipc msg to be checked * * Return: true if the socket rx queue allocation is > 90%, otherwise false */ bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb) { unsigned int lim = rcvbuf_limit(sk, skb); unsigned int qsize = sk_rmem_alloc_get(sk); return (qsize > lim * 90 / 100); } /** * tipc_sk_dump - dump TIPC socket * @sk: tipc sk to be dumped * @dqueues: bitmask to decide if any socket queue to be dumped? * - TIPC_DUMP_NONE: don't dump socket queues * - TIPC_DUMP_SK_SNDQ: dump socket send queue * - TIPC_DUMP_SK_RCVQ: dump socket rcv queue * - TIPC_DUMP_SK_BKLGQ: dump socket backlog queue * - TIPC_DUMP_ALL: dump all the socket queues above * @buf: returned buffer of dump data in format */ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) { int i = 0; size_t sz = (dqueues) ? SK_LMAX : SK_LMIN; u32 conn_type, conn_instance; struct tipc_sock *tsk; struct publication *p; bool tsk_connected; if (!sk) { i += scnprintf(buf, sz, "sk data: (null)\n"); return i; } tsk = tipc_sk(sk); tsk_connected = !tipc_sk_type_connectionless(sk); i += scnprintf(buf, sz, "sk data: %u", sk->sk_type); i += scnprintf(buf + i, sz - i, " %d", sk->sk_state); i += scnprintf(buf + i, sz - i, " %x", tsk_own_node(tsk)); i += scnprintf(buf + i, sz - i, " %u", tsk->portid); i += scnprintf(buf + i, sz - i, " | %u", tsk_connected); if (tsk_connected) { i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk)); i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk)); conn_type = msg_nametype(&tsk->phdr); conn_instance = msg_nameinst(&tsk->phdr); i += scnprintf(buf + i, sz - i, " %u", conn_type); i += scnprintf(buf + i, sz - i, " %u", conn_instance); } i += scnprintf(buf + i, sz - i, " | %u", tsk->published); if (tsk->published) { p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0); i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0); i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0); } i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win); i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win); i += scnprintf(buf + i, sz - i, " %u", tsk->max_pkt); i += scnprintf(buf + i, sz - i, " %x", tsk->peer_caps); i += scnprintf(buf + i, sz - i, " %u", tsk->cong_link_cnt); i += scnprintf(buf + i, sz - i, " %u", tsk->snt_unacked); i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_unacked); i += scnprintf(buf + i, sz - i, " %u", atomic_read(&tsk->dupl_rcvcnt)); i += scnprintf(buf + i, sz - i, " %u", sk->sk_shutdown); i += scnprintf(buf + i, sz - i, " | %d", sk_wmem_alloc_get(sk)); i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf); i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk)); i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf); i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len)); if (dqueues & TIPC_DUMP_SK_SNDQ) { i += scnprintf(buf + i, sz - i, "sk_write_queue: "); i += tipc_list_dump(&sk->sk_write_queue, false, buf + i); } if (dqueues & TIPC_DUMP_SK_RCVQ) { i += scnprintf(buf + i, sz - i, "sk_receive_queue: "); i += tipc_list_dump(&sk->sk_receive_queue, false, buf + i); } if (dqueues & TIPC_DUMP_SK_BKLGQ) { i += scnprintf(buf + i, sz - i, "sk_backlog:\n head "); i += tipc_skb_dump(sk->sk_backlog.head, false, buf + i); if (sk->sk_backlog.tail != sk->sk_backlog.head) { i += scnprintf(buf + i, sz - i, " tail "); i += tipc_skb_dump(sk->sk_backlog.tail, false, buf + i); } } return i; }
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 17 1 1 1 1 1 1 10 3 1 1 1 4 4 4 2 2 2 2 2 4 2 16 1 1 4 4 2 8 8 2 1 1 1 1 1 1 1 1 14 14 2 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_gred.c Generic Random Early Detection queue. * * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 * * 991129: - Bug fix with grio mode * - a better sing. AvgQ mode with Grio(WRED) * - A finer grained VQ dequeue based on suggestion * from Ren Liu * - More error checks * * For all the glorious comments look at include/net/red.h */ #include <linux/slab.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> #include <net/red.h> #define GRED_DEF_PRIO (MAX_DPs / 2) #define GRED_VQ_MASK (MAX_DPs - 1) #define GRED_VQ_RED_FLAGS (TC_RED_ECN | TC_RED_HARDDROP) struct gred_sched_data; struct gred_sched; struct gred_sched_data { u32 limit; /* HARD maximal queue length */ u32 DP; /* the drop parameters */ u32 red_flags; /* virtualQ version of red_flags */ u64 bytesin; /* bytes seen on virtualQ so far*/ u32 packetsin; /* packets seen on virtualQ so far*/ u32 backlog; /* bytes on the virtualQ */ u8 prio; /* the prio of this vq */ struct red_parms parms; struct red_vars vars; struct red_stats stats; }; enum { GRED_WRED_MODE = 1, GRED_RIO_MODE, }; struct gred_sched { struct gred_sched_data *tab[MAX_DPs]; unsigned long flags; u32 red_flags; u32 DPs; u32 def; struct red_vars wred_set; struct tc_gred_qopt_offload *opt; }; static inline int gred_wred_mode(struct gred_sched *table) { return test_bit(GRED_WRED_MODE, &table->flags); } static inline void gred_enable_wred_mode(struct gred_sched *table) { __set_bit(GRED_WRED_MODE, &table->flags); } static inline void gred_disable_wred_mode(struct gred_sched *table) { __clear_bit(GRED_WRED_MODE, &table->flags); } static inline int gred_rio_mode(struct gred_sched *table) { return test_bit(GRED_RIO_MODE, &table->flags); } static inline void gred_enable_rio_mode(struct gred_sched *table) { __set_bit(GRED_RIO_MODE, &table->flags); } static inline void gred_disable_rio_mode(struct gred_sched *table) { __clear_bit(GRED_RIO_MODE, &table->flags); } static inline int gred_wred_mode_check(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); int i; /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ for (i = 0; i < table->DPs; i++) { struct gred_sched_data *q = table->tab[i]; int n; if (q == NULL) continue; for (n = i + 1; n < table->DPs; n++) if (table->tab[n] && table->tab[n]->prio == q->prio) return 1; } return 0; } static inline unsigned int gred_backlog(struct gred_sched *table, struct gred_sched_data *q, struct Qdisc *sch) { if (gred_wred_mode(table)) return sch->qstats.backlog; else return q->backlog; } static inline u16 tc_index_to_dp(struct sk_buff *skb) { return skb->tc_index & GRED_VQ_MASK; } static inline void gred_load_wred_set(const struct gred_sched *table, struct gred_sched_data *q) { q->vars.qavg = table->wred_set.qavg; q->vars.qidlestart = table->wred_set.qidlestart; } static inline void gred_store_wred_set(struct gred_sched *table, struct gred_sched_data *q) { table->wred_set.qavg = q->vars.qavg; table->wred_set.qidlestart = q->vars.qidlestart; } static int gred_use_ecn(struct gred_sched_data *q) { return q->red_flags & TC_RED_ECN; } static int gred_use_harddrop(struct gred_sched_data *q) { return q->red_flags & TC_RED_HARDDROP; } static bool gred_per_vq_red_flags_used(struct gred_sched *table) { unsigned int i; /* Local per-vq flags couldn't have been set unless global are 0 */ if (table->red_flags) return false; for (i = 0; i < MAX_DPs; i++) if (table->tab[i] && table->tab[i]->red_flags) return true; return false; } static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct gred_sched_data *q = NULL; struct gred_sched *t = qdisc_priv(sch); unsigned long qavg = 0; u16 dp = tc_index_to_dp(skb); if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { dp = t->def; q = t->tab[dp]; if (!q) { /* Pass through packets not assigned to a DP * if no default DP has been configured. This * allows for DP flows to be left untouched. */ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) return qdisc_enqueue_tail(skb, sch); else goto drop; } /* fix tc_index? --could be controversial but needed for requeueing */ skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; } /* sum up all the qaves of prios < ours to get the new qave */ if (!gred_wred_mode(t) && gred_rio_mode(t)) { int i; for (i = 0; i < t->DPs; i++) { if (t->tab[i] && t->tab[i]->prio < q->prio && !red_is_idling(&t->tab[i]->vars)) qavg += t->tab[i]->vars.qavg; } } q->packetsin++; q->bytesin += qdisc_pkt_len(skb); if (gred_wred_mode(t)) gred_load_wred_set(t, q); q->vars.qavg = red_calc_qavg(&q->parms, &q->vars, gred_backlog(t, q, sch)); if (red_is_idling(&q->vars)) red_end_of_idle_period(&q->vars); if (gred_wred_mode(t)) gred_store_wred_set(t, q); switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) { case RED_DONT_MARK: break; case RED_PROB_MARK: qdisc_qstats_overlimit(sch); if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.prob_drop++; goto congestion_drop; } q->stats.prob_mark++; break; case RED_HARD_MARK: qdisc_qstats_overlimit(sch); if (gred_use_harddrop(q) || !gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.forced_drop++; goto congestion_drop; } q->stats.forced_mark++; break; } if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) { q->backlog += qdisc_pkt_len(skb); return qdisc_enqueue_tail(skb, sch); } q->stats.pdrop++; drop: return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); congestion_drop: qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_CONGESTED); return NET_XMIT_CN; } static struct sk_buff *gred_dequeue(struct Qdisc *sch) { struct sk_buff *skb; struct gred_sched *t = qdisc_priv(sch); skb = qdisc_dequeue_head(sch); if (skb) { struct gred_sched_data *q; u16 dp = tc_index_to_dp(skb); if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n", tc_index_to_dp(skb)); } else { q->backlog -= qdisc_pkt_len(skb); if (gred_wred_mode(t)) { if (!sch->qstats.backlog) red_start_of_idle_period(&t->wred_set); } else { if (!q->backlog) red_start_of_idle_period(&q->vars); } } return skb; } return NULL; } static void gred_reset(struct Qdisc *sch) { int i; struct gred_sched *t = qdisc_priv(sch); qdisc_reset_queue(sch); for (i = 0; i < t->DPs; i++) { struct gred_sched_data *q = t->tab[i]; if (!q) continue; red_restart(&q->vars); q->backlog = 0; } } static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) { struct gred_sched *table = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_gred_qopt_offload *opt = table->opt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; memset(opt, 0, sizeof(*opt)); opt->command = command; opt->handle = sch->handle; opt->parent = sch->parent; if (command == TC_GRED_REPLACE) { unsigned int i; opt->set.grio_on = gred_rio_mode(table); opt->set.wred_on = gred_wred_mode(table); opt->set.dp_cnt = table->DPs; opt->set.dp_def = table->def; for (i = 0; i < table->DPs; i++) { struct gred_sched_data *q = table->tab[i]; if (!q) continue; opt->set.tab[i].present = true; opt->set.tab[i].limit = q->limit; opt->set.tab[i].prio = q->prio; opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; opt->set.tab[i].is_ecn = gred_use_ecn(q); opt->set.tab[i].is_harddrop = gred_use_harddrop(q); opt->set.tab[i].probability = q->parms.max_P; opt->set.tab[i].backlog = &q->backlog; } opt->set.qstats = &sch->qstats; } dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt); } static int gred_offload_dump_stats(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt_offload *hw_stats; u64 bytes = 0, packets = 0; unsigned int i; int ret; hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL); if (!hw_stats) return -ENOMEM; hw_stats->command = TC_GRED_STATS; hw_stats->handle = sch->handle; hw_stats->parent = sch->parent; for (i = 0; i < MAX_DPs; i++) { gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); if (table->tab[i]) hw_stats->stats.xstats[i] = &table->tab[i]->stats; } ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); /* Even if driver returns failure adjust the stats - in case offload * ended but driver still wants to adjust the values. */ sch_tree_lock(sch); for (i = 0; i < MAX_DPs; i++) { if (!table->tab[i]) continue; table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; sch->qstats.drops += hw_stats->stats.qstats[i].drops; sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; } _bstats_update(&sch->bstats, bytes, packets); sch_tree_unlock(sch); kfree(hw_stats); return ret; } static inline void gred_destroy_vq(struct gred_sched_data *q) { kfree(q); } static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_sopt *sopt; bool red_flags_changed; int i; if (!dps) return -EINVAL; sopt = nla_data(dps); if (sopt->DPs > MAX_DPs) { NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high"); return -EINVAL; } if (sopt->DPs == 0) { NL_SET_ERR_MSG_MOD(extack, "number of virtual queues can't be 0"); return -EINVAL; } if (sopt->def_DP >= sopt->DPs) { NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count"); return -EINVAL; } if (sopt->flags && gred_per_vq_red_flags_used(table)) { NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used"); return -EINVAL; } sch_tree_lock(sch); table->DPs = sopt->DPs; table->def = sopt->def_DP; red_flags_changed = table->red_flags != sopt->flags; table->red_flags = sopt->flags; /* * Every entry point to GRED is synchronized with the above code * and the DP is checked against DPs, i.e. shadowed VQs can no * longer be found so we can unlock right here. */ sch_tree_unlock(sch); if (sopt->grio) { gred_enable_rio_mode(table); gred_disable_wred_mode(table); if (gred_wred_mode_check(sch)) gred_enable_wred_mode(table); } else { gred_disable_rio_mode(table); gred_disable_wred_mode(table); } if (red_flags_changed) for (i = 0; i < table->DPs; i++) if (table->tab[i]) table->tab[i]->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", i); gred_destroy_vq(table->tab[i]); table->tab[i] = NULL; } } gred_offload(sch, TC_GRED_REPLACE); return 0; } static inline int gred_change_vq(struct Qdisc *sch, int dp, struct tc_gred_qopt *ctl, int prio, u8 *stab, u32 max_P, struct gred_sched_data **prealloc, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) { NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; } if (!q) { table->tab[dp] = q = *prealloc; *prealloc = NULL; if (!q) return -ENOMEM; q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; } q->DP = dp; q->prio = prio; if (ctl->limit > sch->limit) q->limit = sch->limit; else q->limit = ctl->limit; if (q->backlog == 0) red_end_of_idle_period(&q->vars); red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, stab, max_P); red_set_vars(&q->vars); return 0; } static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = { [TCA_GRED_VQ_DP] = { .type = NLA_U32 }, [TCA_GRED_VQ_FLAGS] = { .type = NLA_U32 }, }; static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = { [TCA_GRED_VQ_ENTRY] = { .type = NLA_NESTED }, }; static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, [TCA_GRED_MAX_P] = { .type = NLA_U32 }, [TCA_GRED_LIMIT] = { .type = NLA_U32 }, [TCA_GRED_VQ_LIST] = { .type = NLA_NESTED }, }; static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) { struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; u32 dp; nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL); dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); if (tb[TCA_GRED_VQ_FLAGS]) table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); } static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs) { const struct nlattr *attr; int rem; nla_for_each_nested(attr, vqs, rem) { switch (nla_type(attr)) { case TCA_GRED_VQ_ENTRY: gred_vq_apply(table, attr); break; } } } static int gred_vq_validate(struct gred_sched *table, u32 cdp, const struct nlattr *entry, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; int err; u32 dp; err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, extack); if (err < 0) return err; if (!tb[TCA_GRED_VQ_DP]) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified"); return -EINVAL; } dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); if (dp >= table->DPs) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds"); return -EINVAL; } if (dp != cdp && !table->tab[dp]) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated"); return -EINVAL; } if (tb[TCA_GRED_VQ_FLAGS]) { u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); if (table->red_flags && table->red_flags != red_flags) { NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used"); return -EINVAL; } if (red_flags & ~GRED_VQ_RED_FLAGS) { NL_SET_ERR_MSG_MOD(extack, "invalid RED flags specified"); return -EINVAL; } } return 0; } static int gred_vqs_validate(struct gred_sched *table, u32 cdp, struct nlattr *vqs, struct netlink_ext_ack *extack) { const struct nlattr *attr; int rem, err; err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX, gred_vqe_policy, extack); if (err < 0) return err; nla_for_each_nested(attr, vqs, rem) { switch (nla_type(attr)) { case TCA_GRED_VQ_ENTRY: err = gred_vq_validate(table, cdp, attr, extack); if (err) return err; break; default: NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes"); return -EINVAL; } } if (rem > 0) { NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list"); return -EINVAL; } return 0; } static int gred_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt *ctl; struct nlattr *tb[TCA_GRED_MAX + 1]; int err, prio = GRED_DEF_PRIO; u8 *stab; u32 max_P; struct gred_sched_data *prealloc; err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } if (tb[TCA_GRED_PARMS] == NULL || tb[TCA_GRED_STAB] == NULL || tb[TCA_GRED_LIMIT] != NULL) { NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time"); return -EINVAL; } max_P = nla_get_u32_default(tb[TCA_GRED_MAX_P], 0); ctl = nla_data(tb[TCA_GRED_PARMS]); stab = nla_data(tb[TCA_GRED_STAB]); if (ctl->DP >= table->DPs) { NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count"); return -EINVAL; } if (tb[TCA_GRED_VQ_LIST]) { err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST], extack); if (err) return err; } if (gred_rio_mode(table)) { if (ctl->prio == 0) { int def_prio = GRED_DEF_PRIO; if (table->tab[table->def]) def_prio = table->tab[table->def]->prio; printk(KERN_DEBUG "GRED: DP %u does not have a prio " "setting default to %d\n", ctl->DP, def_prio); prio = def_prio; } else prio = ctl->prio; } prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); sch_tree_lock(sch); err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc, extack); if (err < 0) goto err_unlock_free; if (tb[TCA_GRED_VQ_LIST]) gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]); if (gred_rio_mode(table)) { gred_disable_wred_mode(table); if (gred_wred_mode_check(sch)) gred_enable_wred_mode(table); } sch_tree_unlock(sch); kfree(prealloc); gred_offload(sch, TC_GRED_REPLACE); return 0; err_unlock_free: sch_tree_unlock(sch); kfree(prealloc); return err; } static int gred_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct nlattr *tb[TCA_GRED_MAX + 1]; int err; if (!opt) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) { NL_SET_ERR_MSG_MOD(extack, "virtual queue configuration can't be specified at initialization time"); return -EINVAL; } if (tb[TCA_GRED_LIMIT]) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); else sch->limit = qdisc_dev(sch)->tx_queue_len * psched_mtu(qdisc_dev(sch)); if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) { table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL); if (!table->opt) return -ENOMEM; } return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) { struct gred_sched *table = qdisc_priv(sch); struct nlattr *parms, *vqs, *opts = NULL; int i; u32 max_p[MAX_DPs]; struct tc_gred_sopt sopt = { .DPs = table->DPs, .def_DP = table->def, .grio = gred_rio_mode(table), .flags = table->red_flags, }; if (gred_offload_dump_stats(sch)) goto nla_put_failure; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; max_p[i] = q ? q->parms.max_P : 0; } if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) goto nla_put_failure; /* Old style all-in-one dump of VQs */ parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct tc_gred_qopt opt; unsigned long qavg; memset(&opt, 0, sizeof(opt)); if (!q) { /* hack -- fix at some point with proper message This is how we indicate to tc that there is no VQ at this DP */ opt.DP = MAX_DPs + i; goto append_opt; } opt.limit = q->limit; opt.DP = q->DP; opt.backlog = gred_backlog(table, q, sch); opt.prio = q->prio; opt.qth_min = q->parms.qth_min >> q->parms.Wlog; opt.qth_max = q->parms.qth_max >> q->parms.Wlog; opt.Wlog = q->parms.Wlog; opt.Plog = q->parms.Plog; opt.Scell_log = q->parms.Scell_log; opt.early = q->stats.prob_drop; opt.forced = q->stats.forced_drop; opt.pdrop = q->stats.pdrop; opt.packets = q->packetsin; opt.bytesin = q->bytesin; if (gred_wred_mode(table)) gred_load_wred_set(table, q); qavg = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg >> q->parms.Wlog); opt.qave = qavg >> q->parms.Wlog; append_opt: if (nla_append(skb, sizeof(opt), &opt) < 0) goto nla_put_failure; } nla_nest_end(skb, parms); /* Dump the VQs again, in more structured way */ vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST); if (!vqs) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct nlattr *vq; if (!q) continue; vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY); if (!vq) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags)) goto nla_put_failure; /* Stats */ if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin, TCA_GRED_VQ_PAD)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG, gred_backlog(table, q, sch))) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP, q->stats.prob_drop)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK, q->stats.prob_mark)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP, q->stats.forced_drop)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK, q->stats.forced_mark)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop)) goto nla_put_failure; nla_nest_end(skb, vq); } nla_nest_end(skb, vqs); return nla_nest_end(skb, opts); nla_put_failure: nla_nest_cancel(skb, opts); return -EMSGSIZE; } static void gred_destroy(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); int i; for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .id = "gred", .priv_size = sizeof(struct gred_sched), .enqueue = gred_enqueue, .dequeue = gred_dequeue, .peek = qdisc_peek_head, .init = gred_init, .reset = gred_reset, .destroy = gred_destroy, .change = gred_change, .dump = gred_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("gred"); static int __init gred_module_init(void) { return register_qdisc(&gred_qdisc_ops); } static void __exit gred_module_exit(void) { unregister_qdisc(&gred_qdisc_ops); } module_init(gred_module_init) module_exit(gred_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic Random Early Detection qdisc");
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 #ifndef __DRM_VMA_MANAGER_H__ #define __DRM_VMA_MANAGER_H__ /* * Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <drm/drm_mm.h> #include <linux/mm.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/types.h> /* We make up offsets for buffer objects so we can recognize them at * mmap time. pgoff in mmap is an unsigned long, so we need to make sure * that the faked up offset will fit */ #if BITS_PER_LONG == 64 #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1) #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 256) #else #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1) #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16) #endif struct drm_file; struct drm_vma_offset_file { struct rb_node vm_rb; struct drm_file *vm_tag; unsigned long vm_count; }; struct drm_vma_offset_node { rwlock_t vm_lock; struct drm_mm_node vm_node; struct rb_root vm_files; void *driver_private; }; struct drm_vma_offset_manager { rwlock_t vm_lock; struct drm_mm vm_addr_space_mm; }; void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr, unsigned long page_offset, unsigned long size); void drm_vma_offset_manager_destroy(struct drm_vma_offset_manager *mgr); struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_manager *mgr, unsigned long start, unsigned long pages); int drm_vma_offset_add(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node, unsigned long pages); void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node); int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag); int drm_vma_node_allow_once(struct drm_vma_offset_node *node, struct drm_file *tag); void drm_vma_node_revoke(struct drm_vma_offset_node *node, struct drm_file *tag); bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node, struct drm_file *tag); /** * drm_vma_offset_exact_lookup_locked() - Look up node by exact address * @mgr: Manager object * @start: Start address (page-based, not byte-based) * @pages: Size of object (page-based) * * Same as drm_vma_offset_lookup_locked() but does not allow any offset into the node. * It only returns the exact object with the given start address. * * RETURNS: * Node at exact start address @start. */ static inline struct drm_vma_offset_node * drm_vma_offset_exact_lookup_locked(struct drm_vma_offset_manager *mgr, unsigned long start, unsigned long pages) { struct drm_vma_offset_node *node; node = drm_vma_offset_lookup_locked(mgr, start, pages); return (node && node->vm_node.start == start) ? node : NULL; } /** * drm_vma_offset_lock_lookup() - Lock lookup for extended private use * @mgr: Manager object * * Lock VMA manager for extended lookups. Only locked VMA function calls * are allowed while holding this lock. All other contexts are blocked from VMA * until the lock is released via drm_vma_offset_unlock_lookup(). * * Use this if you need to take a reference to the objects returned by * drm_vma_offset_lookup_locked() before releasing this lock again. * * This lock must not be used for anything else than extended lookups. You must * not call any other VMA helpers while holding this lock. * * Note: You're in atomic-context while holding this lock! */ static inline void drm_vma_offset_lock_lookup(struct drm_vma_offset_manager *mgr) { read_lock(&mgr->vm_lock); } /** * drm_vma_offset_unlock_lookup() - Unlock lookup for extended private use * @mgr: Manager object * * Release lookup-lock. See drm_vma_offset_lock_lookup() for more information. */ static inline void drm_vma_offset_unlock_lookup(struct drm_vma_offset_manager *mgr) { read_unlock(&mgr->vm_lock); } /** * drm_vma_node_reset() - Initialize or reset node object * @node: Node to initialize or reset * * Reset a node to its initial state. This must be called before using it with * any VMA offset manager. * * This must not be called on an already allocated node, or you will leak * memory. */ static inline void drm_vma_node_reset(struct drm_vma_offset_node *node) { memset(node, 0, sizeof(*node)); node->vm_files = RB_ROOT; rwlock_init(&node->vm_lock); } /** * drm_vma_node_start() - Return start address for page-based addressing * @node: Node to inspect * * Return the start address of the given node. This can be used as offset into * the linear VM space that is provided by the VMA offset manager. Note that * this can only be used for page-based addressing. If you need a proper offset * for user-space mappings, you must apply "<< PAGE_SHIFT" or use the * drm_vma_node_offset_addr() helper instead. * * RETURNS: * Start address of @node for page-based addressing. 0 if the node does not * have an offset allocated. */ static inline unsigned long drm_vma_node_start(const struct drm_vma_offset_node *node) { return node->vm_node.start; } /** * drm_vma_node_size() - Return size (page-based) * @node: Node to inspect * * Return the size as number of pages for the given node. This is the same size * that was passed to drm_vma_offset_add(). If no offset is allocated for the * node, this is 0. * * RETURNS: * Size of @node as number of pages. 0 if the node does not have an offset * allocated. */ static inline unsigned long drm_vma_node_size(struct drm_vma_offset_node *node) { return node->vm_node.size; } /** * drm_vma_node_offset_addr() - Return sanitized offset for user-space mmaps * @node: Linked offset node * * Same as drm_vma_node_start() but returns the address as a valid offset that * can be used for user-space mappings during mmap(). * This must not be called on unlinked nodes. * * RETURNS: * Offset of @node for byte-based addressing. 0 if the node does not have an * object allocated. */ static inline __u64 drm_vma_node_offset_addr(struct drm_vma_offset_node *node) { return ((__u64)node->vm_node.start) << PAGE_SHIFT; } /** * drm_vma_node_unmap() - Unmap offset node * @node: Offset node * @file_mapping: Address space to unmap @node from * * Unmap all userspace mappings for a given offset node. The mappings must be * associated with the @file_mapping address-space. If no offset exists * nothing is done. * * This call is unlocked. The caller must guarantee that drm_vma_offset_remove() * is not called on this node concurrently. */ static inline void drm_vma_node_unmap(struct drm_vma_offset_node *node, struct address_space *file_mapping) { if (drm_mm_node_allocated(&node->vm_node)) unmap_mapping_range(file_mapping, drm_vma_node_offset_addr(node), drm_vma_node_size(node) << PAGE_SHIFT, 1); } /** * drm_vma_node_verify_access() - Access verification helper for TTM * @node: Offset node * @tag: Tag of file to check * * This checks whether @tag is granted access to @node. It is the same as * drm_vma_node_is_allowed() but suitable as drop-in helper for TTM * verify_access() callbacks. * * RETURNS: * 0 if access is granted, -EACCES otherwise. */ static inline int drm_vma_node_verify_access(struct drm_vma_offset_node *node, struct drm_file *tag) { return drm_vma_node_is_allowed(node, tag) ? 0 : -EACCES; } #endif /* __DRM_VMA_MANAGER_H__ */
5 2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 // SPDX-License-Identifier: GPL-2.0+ /* * HID driver for gaming keys on Razer Blackwidow gaming keyboards * Macro Key Keycodes: M1 = 191, M2 = 192, M3 = 193, M4 = 194, M5 = 195 * * Copyright (c) 2021 Jelle van der Waa <jvanderwaa@redhat.com> */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/random.h> #include <linux/sched.h> #include <linux/usb.h> #include <linux/wait.h> #include "hid-ids.h" #define map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c)) #define RAZER_BLACKWIDOW_TRANSFER_BUF_SIZE 91 static bool macro_key_remapping = 1; module_param(macro_key_remapping, bool, 0644); MODULE_PARM_DESC(macro_key_remapping, " on (Y) off (N)"); static unsigned char blackwidow_init[RAZER_BLACKWIDOW_TRANSFER_BUF_SIZE] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x04, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00 }; static int razer_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if (!macro_key_remapping) return 0; if ((usage->hid & HID_UP_KEYBOARD) != HID_UP_KEYBOARD) return 0; switch (usage->hid & ~HID_UP_KEYBOARD) { case 0x68: map_key_clear(KEY_MACRO1); return 1; case 0x69: map_key_clear(KEY_MACRO2); return 1; case 0x6a: map_key_clear(KEY_MACRO3); return 1; case 0x6b: map_key_clear(KEY_MACRO4); return 1; case 0x6c: map_key_clear(KEY_MACRO5); return 1; } return 0; } static int razer_probe(struct hid_device *hdev, const struct hid_device_id *id) { char *buf; int ret = 0; ret = hid_parse(hdev); if (ret) return ret; /* * Only send the enable macro keys command for the third device * identified as mouse input. */ if (hdev->type == HID_TYPE_USBMOUSE) { buf = kmemdup(blackwidow_init, RAZER_BLACKWIDOW_TRANSFER_BUF_SIZE, GFP_KERNEL); if (buf == NULL) return -ENOMEM; ret = hid_hw_raw_request(hdev, 0, buf, RAZER_BLACKWIDOW_TRANSFER_BUF_SIZE, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret != RAZER_BLACKWIDOW_TRANSFER_BUF_SIZE) hid_err(hdev, "failed to enable macro keys: %d\n", ret); kfree(buf); } return hid_hw_start(hdev, HID_CONNECT_DEFAULT); } static const struct hid_device_id razer_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLACKWIDOW) }, { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLACKWIDOW_CLASSIC) }, { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLACKWIDOW_ULTIMATE) }, { } }; MODULE_DEVICE_TABLE(hid, razer_devices); static struct hid_driver razer_driver = { .name = "razer", .id_table = razer_devices, .input_mapping = razer_input_mapping, .probe = razer_probe, }; module_hid_driver(razer_driver); MODULE_AUTHOR("Jelle van der Waa <jvanderwaa@redhat.com>"); MODULE_DESCRIPTION("HID driver for gaming keys on Razer Blackwidow gaming keyboards"); MODULE_LICENSE("GPL");
58 53 14 49 7 2 7 7 7 14 2 50 8 43 43 42 7 43 42 14 4 4 3 3 2 1 3 3 2 1 78 11 7 2 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 // SPDX-License-Identifier: GPL-2.0-or-later /* * tcp_diag.c Module for monitoring TCP transport protocols sockets. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/net.h> #include <linux/sock_diag.h> #include <linux/inet_diag.h> #include <linux/tcp.h> #include <net/netlink.h> #include <net/tcp.h> static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { struct tcp_info *info = _info; if (inet_sk_state_load(sk) == TCP_LISTEN) { r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog); } else if (sk->sk_type == SOCK_STREAM) { const struct tcp_sock *tp = tcp_sk(sk); r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq), 0); r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; } if (info) tcp_get_info(sk, info); } #ifdef CONFIG_TCP_MD5SIG static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info, const struct tcp_md5sig_key *key) { info->tcpm_family = key->family; info->tcpm_prefixlen = key->prefixlen; info->tcpm_keylen = key->keylen; memcpy(info->tcpm_key, key->key, key->keylen); if (key->family == AF_INET) info->tcpm_addr[0] = key->addr.a4.s_addr; #if IS_ENABLED(CONFIG_IPV6) else if (key->family == AF_INET6) memcpy(&info->tcpm_addr, &key->addr.a6, sizeof(info->tcpm_addr)); #endif } static int tcp_diag_put_md5sig(struct sk_buff *skb, const struct tcp_md5sig_info *md5sig) { const struct tcp_md5sig_key *key; struct tcp_diag_md5sig *info; struct nlattr *attr; int md5sig_count = 0; hlist_for_each_entry_rcu(key, &md5sig->head, node) md5sig_count++; if (md5sig_count == 0) return 0; attr = nla_reserve(skb, INET_DIAG_MD5SIG, md5sig_count * sizeof(struct tcp_diag_md5sig)); if (!attr) return -EMSGSIZE; info = nla_data(attr); memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig)); hlist_for_each_entry_rcu(key, &md5sig->head, node) { tcp_diag_md5sig_fill(info++, key); if (--md5sig_count == 0) break; } return 0; } #endif static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, const struct tcp_ulp_ops *ulp_ops) { struct nlattr *nest; int err; nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO); if (!nest) return -EMSGSIZE; err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name); if (err) goto nla_failure; if (ulp_ops->get_info) err = ulp_ops->get_info(sk, skb); if (err) goto nla_failure; nla_nest_end(skb, nest); return 0; nla_failure: nla_nest_cancel(skb, nest); return err; } static int tcp_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); int err = 0; #ifdef CONFIG_TCP_MD5SIG if (net_admin) { struct tcp_md5sig_info *md5sig; rcu_read_lock(); md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); if (md5sig) err = tcp_diag_put_md5sig(skb, md5sig); rcu_read_unlock(); if (err < 0) return err; } #endif if (net_admin) { const struct tcp_ulp_ops *ulp_ops; ulp_ops = icsk->icsk_ulp_ops; if (ulp_ops) err = tcp_diag_put_ulp(skb, sk, ulp_ops); if (err) return err; } return 0; } static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) { struct inet_connection_sock *icsk = inet_csk(sk); size_t size = 0; #ifdef CONFIG_TCP_MD5SIG if (net_admin && sk_fullsock(sk)) { const struct tcp_md5sig_info *md5sig; const struct tcp_md5sig_key *key; size_t md5sig_count = 0; rcu_read_lock(); md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); if (md5sig) { hlist_for_each_entry_rcu(key, &md5sig->head, node) md5sig_count++; } rcu_read_unlock(); size += nla_total_size(md5sig_count * sizeof(struct tcp_diag_md5sig)); } #endif if (net_admin && sk_fullsock(sk)) { const struct tcp_ulp_ops *ulp_ops; ulp_ops = icsk->icsk_ulp_ops; if (ulp_ops) { size += nla_total_size(0) + nla_total_size(TCP_ULP_NAME_MAX); if (ulp_ops->get_info_size) size += ulp_ops->get_info_size(sk); } } return size; } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { struct inet_hashinfo *hinfo; hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; inet_diag_dump_icsk(hinfo, skb, cb, r); } static int tcp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { struct inet_hashinfo *hinfo; hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; return inet_diag_dump_one_icsk(hinfo, cb, req); } #ifdef CONFIG_INET_DIAG_DESTROY static int tcp_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); struct inet_hashinfo *hinfo; struct sock *sk; int err; hinfo = net->ipv4.tcp_death_row.hashinfo; sk = inet_diag_find_one_icsk(net, hinfo, req); if (IS_ERR(sk)) return PTR_ERR(sk); err = sock_diag_destroy(sk, ECONNABORTED); sock_gen_put(sk); return err; } #endif static const struct inet_diag_handler tcp_diag_handler = { .owner = THIS_MODULE, .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_get_aux = tcp_diag_get_aux, .idiag_get_aux_size = tcp_diag_get_aux_size, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), #ifdef CONFIG_INET_DIAG_DESTROY .destroy = tcp_diag_destroy, #endif }; static int __init tcp_diag_init(void) { return inet_diag_register(&tcp_diag_handler); } static void __exit tcp_diag_exit(void) { inet_diag_unregister(&tcp_diag_handler); } module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);
1 6 6 6 5 4 7 2 1 7 6 3 5 5 2 6 4 6 6 4 9 5 7 8 4 6 7 9 11 7 1 7 3 6 7 3 9 8 5 5 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 // SPDX-License-Identifier: GPL-2.0-only /* * CAIA Delay-Gradient (CDG) congestion control * * This implementation is based on the paper: * D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using * delay gradients." In IFIP Networking, pages 328-341. Springer, 2011. * * Scavenger traffic (Less-than-Best-Effort) should disable coexistence * heuristics using parameters use_shadow=0 and use_ineff=0. * * Parameters window, backoff_beta, and backoff_factor are crucial for * throughput and delay. Future work is needed to determine better defaults, * and to provide guidelines for use in different environments/contexts. * * Except for window, knobs are configured via /sys/module/tcp_cdg/parameters/. * Parameter window is only configurable when loading tcp_cdg as a module. * * Notable differences from paper/FreeBSD: * o Using Hybrid Slow start and Proportional Rate Reduction. * o Add toggle for shadow window mechanism. Suggested by David Hayes. * o Add toggle for non-congestion loss tolerance. * o Scaling parameter G is changed to a backoff factor; * conversion is given by: backoff_factor = 1000/(G * window). * o Limit shadow window to 2 * cwnd, or to cwnd when application limited. * o More accurate e^-x. */ #include <linux/kernel.h> #include <linux/random.h> #include <linux/module.h> #include <linux/sched/clock.h> #include <net/tcp.h> #define HYSTART_ACK_TRAIN 1 #define HYSTART_DELAY 2 static int window __read_mostly = 8; static unsigned int backoff_beta __read_mostly = 0.7071 * 1024; /* sqrt 0.5 */ static unsigned int backoff_factor __read_mostly = 42; static unsigned int hystart_detect __read_mostly = 3; static unsigned int use_ineff __read_mostly = 5; static bool use_shadow __read_mostly = true; static bool use_tolerance __read_mostly; module_param(window, int, 0444); MODULE_PARM_DESC(window, "gradient window size (power of two <= 256)"); module_param(backoff_beta, uint, 0644); MODULE_PARM_DESC(backoff_beta, "backoff beta (0-1024)"); module_param(backoff_factor, uint, 0644); MODULE_PARM_DESC(backoff_factor, "backoff probability scale factor"); module_param(hystart_detect, uint, 0644); MODULE_PARM_DESC(hystart_detect, "use Hybrid Slow start " "(0: disabled, 1: ACK train, 2: delay threshold, 3: both)"); module_param(use_ineff, uint, 0644); MODULE_PARM_DESC(use_ineff, "use ineffectual backoff detection (threshold)"); module_param(use_shadow, bool, 0644); MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); module_param(use_tolerance, bool, 0644); MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); struct cdg_minmax { union { struct { s32 min; s32 max; }; u64 v64; }; }; enum cdg_state { CDG_UNKNOWN = 0, CDG_NONFULL = 1, CDG_FULL = 2, CDG_BACKOFF = 3, }; struct cdg { struct cdg_minmax rtt; struct cdg_minmax rtt_prev; struct cdg_minmax *gradients; struct cdg_minmax gsum; bool gfilled; u8 tail; u8 state; u8 delack; u32 rtt_seq; u32 shadow_wnd; u16 backoff_cnt; u16 sample_cnt; s32 delay_min; u32 last_ack; u32 round_start; }; /** * nexp_u32 - negative base-e exponential * @ux: x in units of micro * * Returns exp(ux * -1e-6) * U32_MAX. */ static u32 __pure nexp_u32(u32 ux) { static const u16 v[] = { /* exp(-x)*65536-1 for x = 0, 0.000256, 0.000512, ... */ 65535, 65518, 65501, 65468, 65401, 65267, 65001, 64470, 63422, 61378, 57484, 50423, 38795, 22965, 8047, 987, 14, }; u32 msb = ux >> 8; u32 res; int i; /* Cut off when ux >= 2^24 (actual result is <= 222/U32_MAX). */ if (msb > U16_MAX) return 0; /* Scale first eight bits linearly: */ res = U32_MAX - (ux & 0xff) * (U32_MAX / 1000000); /* Obtain e^(x + y + ...) by computing e^x * e^y * ...: */ for (i = 1; msb; i++, msb >>= 1) { u32 y = v[i & -(msb & 1)] + U32_C(1); res = ((u64)res * y) >> 16; } return res; } /* Based on the HyStart algorithm (by Ha et al.) that is implemented in * tcp_cubic. Differences/experimental changes: * o Using Hayes' delayed ACK filter. * o Using a usec clock for the ACK train. * o Reset ACK train when application limited. * o Invoked at any cwnd (i.e. also when cwnd < 16). * o Invoked only when cwnd < ssthresh (i.e. not when cwnd == ssthresh). */ static void tcp_cdg_hystart_update(struct sock *sk) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); ca->delay_min = min_not_zero(ca->delay_min, ca->rtt.min); if (ca->delay_min == 0) return; if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now_us = tp->tcp_mstamp; if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) { ca->last_ack = now_us; ca->round_start = now_us; } else if (before(now_us, ca->last_ack + 3000)) { u32 base_owd = max(ca->delay_min / 2U, 125U); ca->last_ack = now_us; if (after(now_us, ca->round_start + base_owd)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); return; } } } if (hystart_detect & HYSTART_DELAY) { if (ca->sample_cnt < 8) { ca->sample_cnt++; } else { s32 thresh = max(ca->delay_min + ca->delay_min / 8U, 125U); if (ca->rtt.min > thresh) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } } static s32 tcp_cdg_grad(struct cdg *ca) { s32 gmin = ca->rtt.min - ca->rtt_prev.min; s32 gmax = ca->rtt.max - ca->rtt_prev.max; s32 grad; if (ca->gradients) { ca->gsum.min += gmin - ca->gradients[ca->tail].min; ca->gsum.max += gmax - ca->gradients[ca->tail].max; ca->gradients[ca->tail].min = gmin; ca->gradients[ca->tail].max = gmax; ca->tail = (ca->tail + 1) & (window - 1); gmin = ca->gsum.min; gmax = ca->gsum.max; } /* We keep sums to ignore gradients during cwnd reductions; * the paper's smoothed gradients otherwise simplify to: * (rtt_latest - rtt_oldest) / window. * * We also drop division by window here. */ grad = gmin > 0 ? gmin : gmax; /* Extrapolate missing values in gradient window: */ if (!ca->gfilled) { if (!ca->gradients && window > 1) grad *= window; /* Memory allocation failed. */ else if (ca->tail == 0) ca->gfilled = true; else grad = (grad * window) / (int)ca->tail; } /* Backoff was effectual: */ if (gmin <= -32 || gmax <= -32) ca->backoff_cnt = 0; if (use_tolerance) { /* Reduce small variations to zero: */ gmin = DIV_ROUND_CLOSEST(gmin, 64); gmax = DIV_ROUND_CLOSEST(gmax, 64); if (gmin > 0 && gmax <= 0) ca->state = CDG_FULL; else if ((gmin > 0 && gmax > 0) || gmax < 0) ca->state = CDG_NONFULL; } return grad; } static bool tcp_cdg_backoff(struct sock *sk, u32 grad) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); if (get_random_u32() <= nexp_u32(grad * backoff_factor)) return false; if (use_ineff) { ca->backoff_cnt++; if (ca->backoff_cnt > use_ineff) return false; } ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp)); ca->state = CDG_BACKOFF; tcp_enter_cwr(sk); return true; } /* Not called in CWR or Recovery state. */ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); u32 prior_snd_cwnd; u32 incr; if (tcp_in_slow_start(tp) && hystart_detect) tcp_cdg_hystart_update(sk); if (after(ack, ca->rtt_seq) && ca->rtt.v64) { s32 grad = 0; if (ca->rtt_prev.v64) grad = tcp_cdg_grad(ca); ca->rtt_seq = tp->snd_nxt; ca->rtt_prev = ca->rtt; ca->rtt.v64 = 0; ca->last_ack = 0; ca->sample_cnt = 0; if (grad > 0 && tcp_cdg_backoff(sk, grad)) return; } if (!tcp_is_cwnd_limited(sk)) { ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp)); return; } prior_snd_cwnd = tcp_snd_cwnd(tp); tcp_reno_cong_avoid(sk, ack, acked); incr = tcp_snd_cwnd(tp) - prior_snd_cwnd; ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); } static void tcp_cdg_acked(struct sock *sk, const struct ack_sample *sample) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); if (sample->rtt_us <= 0) return; /* A heuristic for filtering delayed ACKs, adapted from: * D.A. Hayes. "Timing enhancements to the FreeBSD kernel to support * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010. */ if (tp->sacked_out == 0) { if (sample->pkts_acked == 1 && ca->delack) { /* A delayed ACK is only used for the minimum if it is * provenly lower than an existing non-zero minimum. */ ca->rtt.min = min(ca->rtt.min, sample->rtt_us); ca->delack--; return; } else if (sample->pkts_acked > 1 && ca->delack < 5) { ca->delack++; } } ca->rtt.min = min_not_zero(ca->rtt.min, sample->rtt_us); ca->rtt.max = max(ca->rtt.max, sample->rtt_us); } static u32 tcp_cdg_ssthresh(struct sock *sk) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); if (ca->state == CDG_BACKOFF) return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10); if (ca->state == CDG_NONFULL && use_tolerance) return tcp_snd_cwnd(tp); ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp)); if (use_shadow) return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1); return max(2U, tcp_snd_cwnd(tp) >> 1); } static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); struct cdg_minmax *gradients; switch (ev) { case CA_EVENT_CWND_RESTART: gradients = ca->gradients; if (gradients) memset(gradients, 0, window * sizeof(gradients[0])); memset(ca, 0, sizeof(*ca)); ca->gradients = gradients; ca->rtt_seq = tp->snd_nxt; ca->shadow_wnd = tcp_snd_cwnd(tp); break; case CA_EVENT_COMPLETE_CWR: ca->state = CDG_UNKNOWN; ca->rtt_seq = tp->snd_nxt; ca->rtt_prev = ca->rtt; ca->rtt.v64 = 0; break; default: break; } } static void tcp_cdg_init(struct sock *sk) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); ca->gradients = NULL; /* We silently fall back to window = 1 if allocation fails. */ if (window > 1) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), GFP_NOWAIT | __GFP_NOWARN); ca->rtt_seq = tp->snd_nxt; ca->shadow_wnd = tcp_snd_cwnd(tp); } static void tcp_cdg_release(struct sock *sk) { struct cdg *ca = inet_csk_ca(sk); kfree(ca->gradients); ca->gradients = NULL; } static struct tcp_congestion_ops tcp_cdg __read_mostly = { .cong_avoid = tcp_cdg_cong_avoid, .cwnd_event = tcp_cdg_cwnd_event, .pkts_acked = tcp_cdg_acked, .undo_cwnd = tcp_reno_undo_cwnd, .ssthresh = tcp_cdg_ssthresh, .release = tcp_cdg_release, .init = tcp_cdg_init, .owner = THIS_MODULE, .name = "cdg", }; static int __init tcp_cdg_register(void) { if (backoff_beta > 1024 || window < 1 || window > 256) return -ERANGE; if (!is_power_of_2(window)) return -EINVAL; BUILD_BUG_ON(sizeof(struct cdg) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_cdg); return 0; } static void __exit tcp_cdg_unregister(void) { tcp_unregister_congestion_control(&tcp_cdg); } module_init(tcp_cdg_register); module_exit(tcp_cdg_unregister); MODULE_AUTHOR("Kenneth Klette Jonassen"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP CDG");
6693 6693 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 #ifndef BLK_THROTTLE_H #define BLK_THROTTLE_H #include "blk-cgroup-rwstat.h" /* * To implement hierarchical throttling, throtl_grps form a tree and bios * are dispatched upwards level by level until they reach the top and get * issued. When dispatching bios from the children and local group at each * level, if the bios are dispatched into a single bio_list, there's a risk * of a local or child group which can queue many bios at once filling up * the list starving others. * * To avoid such starvation, dispatched bios are queued separately * according to where they came from. When they are again dispatched to * the parent, they're popped in round-robin order so that no single source * hogs the dispatch window. * * throtl_qnode is used to keep the queued bios separated by their sources. * Bios are queued to throtl_qnode which in turn is queued to * throtl_service_queue and then dispatched in round-robin order. * * It's also used to track the reference counts on blkg's. A qnode always * belongs to a throtl_grp and gets queued on itself or the parent, so * incrementing the reference of the associated throtl_grp when a qnode is * queued and decrementing when dequeued is enough to keep the whole blkg * tree pinned while bios are in flight. */ struct throtl_qnode { struct list_head node; /* service_queue->queued[] */ struct bio_list bios; /* queued bios */ struct throtl_grp *tg; /* tg this qnode belongs to */ }; struct throtl_service_queue { struct throtl_service_queue *parent_sq; /* the parent service_queue */ /* * Bios queued directly to this service_queue or dispatched from * children throtl_grp's. */ struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ unsigned int nr_queued[2]; /* number of queued bios */ /* * RB tree of active children throtl_grp's, which are sorted by * their ->disptime. */ struct rb_root_cached pending_tree; /* RB tree of active tgs */ unsigned int nr_pending; /* # queued in the tree */ unsigned long first_pending_disptime; /* disptime of the first tg */ struct timer_list pending_timer; /* fires on first_pending_disptime */ }; enum tg_state_flags { THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */ }; struct throtl_grp { /* must be the first member */ struct blkg_policy_data pd; /* active throtl group service_queue member */ struct rb_node rb_node; /* throtl_data this group belongs to */ struct throtl_data *td; /* this group's service queue */ struct throtl_service_queue service_queue; /* * qnode_on_self is used when bios are directly queued to this * throtl_grp so that local bios compete fairly with bios * dispatched from children. qnode_on_parent is used when bios are * dispatched from this throtl_grp into its parent and will compete * with the sibling qnode_on_parents and the parent's * qnode_on_self. */ struct throtl_qnode qnode_on_self[2]; struct throtl_qnode qnode_on_parent[2]; /* * Dispatch time in jiffies. This is the estimated time when group * will unthrottle and is ready to dispatch more bio. It is used as * key to sort active groups in service tree. */ unsigned long disptime; unsigned int flags; /* are there any throtl rules between this group and td? */ bool has_rules_bps[2]; bool has_rules_iops[2]; /* bytes per second rate limits */ uint64_t bps[2]; /* IOPS limits */ unsigned int iops[2]; /* Number of bytes dispatched in current slice */ uint64_t bytes_disp[2]; /* Number of bio's dispatched in current slice */ unsigned int io_disp[2]; uint64_t last_bytes_disp[2]; unsigned int last_io_disp[2]; /* * The following two fields are updated when new configuration is * submitted while some bios are still throttled, they record how many * bytes/ios are waited already in previous configuration, and they will * be used to calculate wait time under new configuration. */ long long carryover_bytes[2]; int carryover_ios[2]; unsigned long last_check_time; /* When did we start a new slice */ unsigned long slice_start[2]; unsigned long slice_end[2]; struct blkg_rwstat stat_bytes; struct blkg_rwstat stat_ios; }; extern struct blkcg_policy blkcg_policy_throtl; static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct throtl_grp, pd) : NULL; } static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) { return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); } /* * Internal throttling interface */ #ifndef CONFIG_BLK_DEV_THROTTLING static inline void blk_throtl_exit(struct gendisk *disk) { } static inline bool blk_throtl_bio(struct bio *bio) { return false; } static inline void blk_throtl_cancel_bios(struct gendisk *disk) { } #else /* CONFIG_BLK_DEV_THROTTLING */ void blk_throtl_exit(struct gendisk *disk); bool __blk_throtl_bio(struct bio *bio); void blk_throtl_cancel_bios(struct gendisk *disk); static inline bool blk_throtl_activated(struct request_queue *q) { return q->td != NULL; } static inline bool blk_should_throtl(struct bio *bio) { struct throtl_grp *tg; int rw = bio_data_dir(bio); /* * This is called under bio_queue_enter(), and it's synchronized with * the activation of blk-throtl, which is protected by * blk_mq_freeze_queue(). */ if (!blk_throtl_activated(bio->bi_bdev->bd_queue)) return false; tg = blkg_to_tg(bio->bi_blkg); if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) { if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { bio_set_flag(bio, BIO_CGROUP_ACCT); blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, bio->bi_iter.bi_size); } blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); } /* iops limit is always counted */ if (tg->has_rules_iops[rw]) return true; if (tg->has_rules_bps[rw] && !bio_flagged(bio, BIO_BPS_THROTTLED)) return true; return false; } static inline bool blk_throtl_bio(struct bio *bio) { if (!blk_should_throtl(bio)) return false; return __blk_throtl_bio(bio); } #endif /* CONFIG_BLK_DEV_THROTTLING */ #endif
187 187 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 // SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include <trace/events/mmap_lock.h> #include <linux/mm.h> #include <linux/cgroup.h> #include <linux/memcontrol.h> #include <linux/mmap_lock.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/smp.h> #include <linux/trace_events.h> #include <linux/local_lock.h> EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); #ifdef CONFIG_TRACING /* * Trace calls must be in a separate file, as otherwise there's a circular * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. */ void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) { trace_mmap_lock_start_locking(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, bool success) { trace_mmap_lock_acquire_returned(mm, write, success); } EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) { trace_mmap_lock_released(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_released); #endif /* CONFIG_TRACING */
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * SM4, as specified in * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html * * Copyright (C) 2018 ARM Limited or its affiliates. * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ #include <linux/module.h> #include <linux/unaligned.h> #include <crypto/sm4.h> static const u32 ____cacheline_aligned fk[4] = { 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc }; static const u32 ____cacheline_aligned ck[32] = { 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 }; static const u8 ____cacheline_aligned sbox[256] = { 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 }; extern const u32 crypto_sm4_fk[4] __alias(fk); extern const u32 crypto_sm4_ck[32] __alias(ck); extern const u8 crypto_sm4_sbox[256] __alias(sbox); EXPORT_SYMBOL(crypto_sm4_fk); EXPORT_SYMBOL(crypto_sm4_ck); EXPORT_SYMBOL(crypto_sm4_sbox); static inline u32 sm4_t_non_lin_sub(u32 x) { u32 out; out = (u32)sbox[x & 0xff]; out |= (u32)sbox[(x >> 8) & 0xff] << 8; out |= (u32)sbox[(x >> 16) & 0xff] << 16; out |= (u32)sbox[(x >> 24) & 0xff] << 24; return out; } static inline u32 sm4_key_lin_sub(u32 x) { return x ^ rol32(x, 13) ^ rol32(x, 23); } static inline u32 sm4_enc_lin_sub(u32 x) { return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); } static inline u32 sm4_key_sub(u32 x) { return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); } static inline u32 sm4_enc_sub(u32 x) { return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); } static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk) { return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); } /** * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 * @ctx: The location where the computed key will be stored. * @in_key: The supplied key. * @key_len: The length of the supplied key. * * Returns 0 on success. The function fails only if an invalid key size (or * pointer) is supplied. */ int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, unsigned int key_len) { u32 rk[4]; const u32 *key = (u32 *)in_key; int i; if (key_len != SM4_KEY_SIZE) return -EINVAL; rk[0] = get_unaligned_be32(&key[0]) ^ fk[0]; rk[1] = get_unaligned_be32(&key[1]) ^ fk[1]; rk[2] = get_unaligned_be32(&key[2]) ^ fk[2]; rk[3] = get_unaligned_be32(&key[3]) ^ fk[3]; for (i = 0; i < 32; i += 4) { rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); ctx->rkey_enc[i + 0] = rk[0]; ctx->rkey_enc[i + 1] = rk[1]; ctx->rkey_enc[i + 2] = rk[2]; ctx->rkey_enc[i + 3] = rk[3]; ctx->rkey_dec[31 - 0 - i] = rk[0]; ctx->rkey_dec[31 - 1 - i] = rk[1]; ctx->rkey_dec[31 - 2 - i] = rk[2]; ctx->rkey_dec[31 - 3 - i] = rk[3]; } return 0; } EXPORT_SYMBOL_GPL(sm4_expandkey); /** * sm4_crypt_block - Encrypt or decrypt a single SM4 block * @rk: The rkey_enc for encrypt or rkey_dec for decrypt * @out: Buffer to store output data * @in: Buffer containing the input data */ void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in) { u32 x[4], i; x[0] = get_unaligned_be32(in + 0 * 4); x[1] = get_unaligned_be32(in + 1 * 4); x[2] = get_unaligned_be32(in + 2 * 4); x[3] = get_unaligned_be32(in + 3 * 4); for (i = 0; i < 32; i += 4) { x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); } put_unaligned_be32(x[3 - 0], out + 0 * 4); put_unaligned_be32(x[3 - 1], out + 1 * 4); put_unaligned_be32(x[3 - 2], out + 2 * 4); put_unaligned_be32(x[3 - 3], out + 3 * 4); } EXPORT_SYMBOL_GPL(sm4_crypt_block); MODULE_DESCRIPTION("Generic SM4 library"); MODULE_LICENSE("GPL v2");
15 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Public Key Signature Algorithm * * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/sig.h> #include <linux/cryptouser.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/string.h> #include <net/netlink.h> #include "internal.h" static void crypto_sig_exit_tfm(struct crypto_tfm *tfm) { struct crypto_sig *sig = __crypto_sig_tfm(tfm); struct sig_alg *alg = crypto_sig_alg(sig); alg->exit(sig); } static int crypto_sig_init_tfm(struct crypto_tfm *tfm) { struct crypto_sig *sig = __crypto_sig_tfm(tfm); struct sig_alg *alg = crypto_sig_alg(sig); if (alg->exit) sig->base.exit = crypto_sig_exit_tfm; if (alg->init) return alg->init(sig); return 0; } static void crypto_sig_free_instance(struct crypto_instance *inst) { struct sig_instance *sig = sig_instance(inst); sig->free(sig); } static void __maybe_unused crypto_sig_show(struct seq_file *m, struct crypto_alg *alg) { seq_puts(m, "type : sig\n"); } static int __maybe_unused crypto_sig_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_sig rsig = {}; strscpy(rsig.type, "sig", sizeof(rsig.type)); return nla_put(skb, CRYPTOCFGA_REPORT_SIG, sizeof(rsig), &rsig); } static const struct crypto_type crypto_sig_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_sig_init_tfm, .free = crypto_sig_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_sig_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_sig_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_SIG, .tfmsize = offsetof(struct crypto_sig, base), }; struct crypto_sig *crypto_alloc_sig(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_sig_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_sig); static int sig_default_sign(struct crypto_sig *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen) { return -ENOSYS; } static int sig_default_verify(struct crypto_sig *tfm, const void *src, unsigned int slen, const void *dst, unsigned int dlen) { return -ENOSYS; } static int sig_default_set_key(struct crypto_sig *tfm, const void *key, unsigned int keylen) { return -ENOSYS; } static int sig_prepare_alg(struct sig_alg *alg) { struct crypto_alg *base = &alg->base; if (!alg->sign) alg->sign = sig_default_sign; if (!alg->verify) alg->verify = sig_default_verify; if (!alg->set_priv_key) alg->set_priv_key = sig_default_set_key; if (!alg->set_pub_key) return -EINVAL; if (!alg->key_size) return -EINVAL; if (!alg->max_size) alg->max_size = alg->key_size; if (!alg->digest_size) alg->digest_size = alg->key_size; base->cra_type = &crypto_sig_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_SIG; return 0; } int crypto_register_sig(struct sig_alg *alg) { struct crypto_alg *base = &alg->base; int err; err = sig_prepare_alg(alg); if (err) return err; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_sig); void crypto_unregister_sig(struct sig_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_sig); int sig_register_instance(struct crypto_template *tmpl, struct sig_instance *inst) { int err; if (WARN_ON(!inst->free)) return -EINVAL; err = sig_prepare_alg(&inst->alg); if (err) return err; return crypto_register_instance(tmpl, sig_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(sig_register_instance); int crypto_grab_sig(struct crypto_sig_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_sig_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_sig); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Public Key Signature Algorithms");
5243 3356 43 43 353 349 349 349 349 116 116 116 4 103 115 3149 3137 3140 3143 3149 5146 4933 3130 3128 3128 5151 4934 3133 3126 3684 3492 3068 3068 3067 3070 345 344 345 345 345 345 345 43 43 43 3132 3069 5498 5499 345 345 345 345 281 281 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 // SPDX-License-Identifier: GPL-2.0 /* * bus.c - bus driver management * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2007 Novell Inc. * Copyright (c) 2023 Greg Kroah-Hartman <gregkh@linuxfoundation.org> */ #include <linux/async.h> #include <linux/device/bus.h> #include <linux/device.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/string.h> #include <linux/mutex.h> #include <linux/sysfs.h> #include "base.h" #include "power/power.h" /* /sys/devices/system */ static struct kset *system_kset; /* /sys/bus */ static struct kset *bus_kset; #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr) /* * sysfs bindings for drivers */ #define to_drv_attr(_attr) container_of(_attr, struct driver_attribute, attr) #define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ struct driver_attribute driver_attr_##_name = \ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) static int __must_check bus_rescan_devices_helper(struct device *dev, void *data); /** * bus_to_subsys - Turn a struct bus_type into a struct subsys_private * * @bus: pointer to the struct bus_type to look up * * The driver core internals needs to work on the subsys_private structure, not * the external struct bus_type pointer. This function walks the list of * registered busses in the system and finds the matching one and returns the * internal struct subsys_private that relates to that bus. * * Note, the reference count of the return value is INCREMENTED if it is not * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ static struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; if (!bus || !bus_kset) return NULL; spin_lock(&bus_kset->list_lock); if (list_empty(&bus_kset->list)) goto done; list_for_each_entry(kobj, &bus_kset->list, entry) { struct kset *kset = container_of(kobj, struct kset, kobj); sp = container_of_const(kset, struct subsys_private, subsys); if (sp->bus == bus) goto done; } sp = NULL; done: sp = subsys_get(sp); spin_unlock(&bus_kset->list_lock); return sp; } static const struct bus_type *bus_get(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); if (sp) return bus; return NULL; } static void bus_put(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); /* two puts are required as the call to bus_to_subsys incremented it again */ subsys_put(sp); subsys_put(sp); } static ssize_t drv_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct driver_attribute *drv_attr = to_drv_attr(attr); struct driver_private *drv_priv = to_driver(kobj); ssize_t ret = -EIO; if (drv_attr->show) ret = drv_attr->show(drv_priv->driver, buf); return ret; } static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct driver_attribute *drv_attr = to_drv_attr(attr); struct driver_private *drv_priv = to_driver(kobj); ssize_t ret = -EIO; if (drv_attr->store) ret = drv_attr->store(drv_priv->driver, buf, count); return ret; } static const struct sysfs_ops driver_sysfs_ops = { .show = drv_attr_show, .store = drv_attr_store, }; static void driver_release(struct kobject *kobj) { struct driver_private *drv_priv = to_driver(kobj); pr_debug("driver: '%s': %s\n", kobject_name(kobj), __func__); kfree(drv_priv); } static const struct kobj_type driver_ktype = { .sysfs_ops = &driver_sysfs_ops, .release = driver_release, }; /* * sysfs bindings for buses */ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); /* return -EIO for reading a bus attribute without show() */ ssize_t ret = -EIO; if (bus_attr->show) ret = bus_attr->show(subsys_priv->bus, buf); return ret; } static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); /* return -EIO for writing a bus attribute without store() */ ssize_t ret = -EIO; if (bus_attr->store) ret = bus_attr->store(subsys_priv->bus, buf, count); return ret; } static const struct sysfs_ops bus_sysfs_ops = { .show = bus_attr_show, .store = bus_attr_store, }; int bus_create_file(const struct bus_type *bus, struct bus_attribute *attr) { struct subsys_private *sp = bus_to_subsys(bus); int error; if (!sp) return -EINVAL; error = sysfs_create_file(&sp->subsys.kobj, &attr->attr); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_create_file); void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr) { struct subsys_private *sp = bus_to_subsys(bus); if (!sp) return; sysfs_remove_file(&sp->subsys.kobj, &attr->attr); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_remove_file); static void bus_release(struct kobject *kobj) { struct subsys_private *priv = to_subsys_private(kobj); lockdep_unregister_key(&priv->lock_key); kfree(priv); } static const struct kobj_type bus_ktype = { .sysfs_ops = &bus_sysfs_ops, .release = bus_release, }; static int bus_uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &bus_ktype) return 1; return 0; } static const struct kset_uevent_ops bus_uevent_ops = { .filter = bus_uevent_filter, }; /* Manually detach a device from its associated driver. */ static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) { const struct bus_type *bus = bus_get(drv->bus); struct device *dev; int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); if (dev && dev->driver == drv) { device_driver_detach(dev); err = count; } put_device(dev); bus_put(bus); return err; } static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store); /* * Manually attach a device to a driver. * Note: the driver must want to bind to the device, * it is not possible to override the driver's id table. */ static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count) { const struct bus_type *bus = bus_get(drv->bus); struct device *dev; int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); if (dev && driver_match_device(drv, dev)) { err = device_driver_attach(drv, dev); if (!err) { /* success */ err = count; } } put_device(dev); bus_put(bus); return err; } static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store); static ssize_t drivers_autoprobe_show(const struct bus_type *bus, char *buf) { struct subsys_private *sp = bus_to_subsys(bus); int ret; if (!sp) return -EINVAL; ret = sysfs_emit(buf, "%d\n", sp->drivers_autoprobe); subsys_put(sp); return ret; } static ssize_t drivers_autoprobe_store(const struct bus_type *bus, const char *buf, size_t count) { struct subsys_private *sp = bus_to_subsys(bus); if (!sp) return -EINVAL; if (buf[0] == '0') sp->drivers_autoprobe = 0; else sp->drivers_autoprobe = 1; subsys_put(sp); return count; } static ssize_t drivers_probe_store(const struct bus_type *bus, const char *buf, size_t count) { struct device *dev; int err = -EINVAL; dev = bus_find_device_by_name(bus, NULL, buf); if (!dev) return -ENODEV; if (bus_rescan_devices_helper(dev, NULL) == 0) err = count; put_device(dev); return err; } static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct device *dev = NULL; struct device_private *dev_prv; if (n) { dev_prv = to_device_private_bus(n); dev = dev_prv->device; } return dev; } /** * bus_for_each_dev - device iterator. * @bus: bus type. * @start: device to start iterating from. * @data: data for the callback. * @fn: function to be called for each device. * * Iterate over @bus's list of devices, and call @fn for each, * passing it @data. If @start is not NULL, we use that device to * begin iterating from. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * * NOTE: The device that returns a non-zero value is not retained * in any way, nor is its refcount incremented. If the caller needs * to retain this data, it should do so, and increment the reference * count in the supplied callback. */ int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, device_iter_t fn) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device *dev; int error = 0; if (!sp) return -EINVAL; klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); while (!error && (dev = next_device(&i))) error = fn(dev, data); klist_iter_exit(&i); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_for_each_dev); /** * bus_find_device - device iterator for locating a particular device. * @bus: bus type * @start: Device to begin with * @data: Data to pass to match function * @match: Callback function to check device * * This is similar to the bus_for_each_dev() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero, this function will * return to the caller and not iterate over any more devices. */ struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, device_match_t match) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device *dev; if (!sp) return NULL; klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); while ((dev = next_device(&i))) { if (match(dev, data)) { get_device(dev); break; } } klist_iter_exit(&i); subsys_put(sp); return dev; } EXPORT_SYMBOL_GPL(bus_find_device); static struct device_driver *next_driver(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct driver_private *drv_priv; if (n) { drv_priv = container_of(n, struct driver_private, knode_bus); return drv_priv->driver; } return NULL; } /** * bus_for_each_drv - driver iterator * @bus: bus we're dealing with. * @start: driver to start iterating on. * @data: data to pass to the callback. * @fn: function to call for each driver. * * This is nearly identical to the device iterator above. * We iterate over each driver that belongs to @bus, and call * @fn for each. If @fn returns anything but 0, we break out * and return it. If @start is not NULL, we use it as the head * of the list. * * NOTE: we don't return the driver that returns a non-zero * value, nor do we leave the reference count incremented for that * driver. If the caller needs to know that info, it must set it * in the callback. It must also be sure to increment the refcount * so it doesn't disappear before returning to the caller. */ int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device_driver *drv; int error = 0; if (!sp) return -EINVAL; klist_iter_init_node(&sp->klist_drivers, &i, start ? &start->p->knode_bus : NULL); while ((drv = next_driver(&i)) && !error) error = fn(drv, data); klist_iter_exit(&i); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_for_each_drv); /** * bus_add_device - add device to bus * @dev: device being added * * - Add device's bus attributes. * - Create links to device's bus. * - Add the device to its bus's list of devices. */ int bus_add_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); int error; if (!sp) { /* * This is a normal operation for many devices that do not * have a bus assigned to them, just say that all went * well. */ return 0; } /* * Reference in sp is now incremented and will be dropped when * the device is removed from the bus */ pr_debug("bus: '%s': add device %s\n", sp->bus->name, dev_name(dev)); error = device_add_groups(dev, sp->bus->dev_groups); if (error) goto out_put; error = sysfs_create_link(&sp->devices_kset->kobj, &dev->kobj, dev_name(dev)); if (error) goto out_groups; error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem"); if (error) goto out_subsys; klist_add_tail(&dev->p->knode_bus, &sp->klist_devices); return 0; out_subsys: sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); out_groups: device_remove_groups(dev, sp->bus->dev_groups); out_put: subsys_put(sp); return error; } /** * bus_probe_device - probe drivers for a new device * @dev: device to probe * * - Automatically probe for a driver if the bus allows it. */ void bus_probe_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); struct subsys_interface *sif; if (!sp) return; if (sp->drivers_autoprobe) device_initial_probe(dev); mutex_lock(&sp->mutex); list_for_each_entry(sif, &sp->interfaces, node) if (sif->add_dev) sif->add_dev(dev, sif); mutex_unlock(&sp->mutex); subsys_put(sp); } /** * bus_remove_device - remove device from bus * @dev: device to be removed * * - Remove device from all interfaces. * - Remove symlink from bus' directory. * - Delete device from bus's list. * - Detach from its driver. * - Drop reference taken in bus_add_device(). */ void bus_remove_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); struct subsys_interface *sif; if (!sp) return; mutex_lock(&sp->mutex); list_for_each_entry(sif, &sp->interfaces, node) if (sif->remove_dev) sif->remove_dev(dev, sif); mutex_unlock(&sp->mutex); sysfs_remove_link(&dev->kobj, "subsystem"); sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); device_remove_groups(dev, dev->bus->dev_groups); if (klist_node_attached(&dev->p->knode_bus)) klist_del(&dev->p->knode_bus); pr_debug("bus: '%s': remove device %s\n", dev->bus->name, dev_name(dev)); device_release_driver(dev); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in bus_add_device() */ subsys_put(sp); subsys_put(sp); } static int __must_check add_bind_files(struct device_driver *drv) { int ret; ret = driver_create_file(drv, &driver_attr_unbind); if (ret == 0) { ret = driver_create_file(drv, &driver_attr_bind); if (ret) driver_remove_file(drv, &driver_attr_unbind); } return ret; } static void remove_bind_files(struct device_driver *drv) { driver_remove_file(drv, &driver_attr_bind); driver_remove_file(drv, &driver_attr_unbind); } static BUS_ATTR_WO(drivers_probe); static BUS_ATTR_RW(drivers_autoprobe); static int add_probe_files(const struct bus_type *bus) { int retval; retval = bus_create_file(bus, &bus_attr_drivers_probe); if (retval) goto out; retval = bus_create_file(bus, &bus_attr_drivers_autoprobe); if (retval) bus_remove_file(bus, &bus_attr_drivers_probe); out: return retval; } static void remove_probe_files(const struct bus_type *bus) { bus_remove_file(bus, &bus_attr_drivers_autoprobe); bus_remove_file(bus, &bus_attr_drivers_probe); } static ssize_t uevent_store(struct device_driver *drv, const char *buf, size_t count) { int rc; rc = kobject_synth_uevent(&drv->p->kobj, buf, count); return rc ? rc : count; } static DRIVER_ATTR_WO(uevent); /** * bus_add_driver - Add a driver to the bus. * @drv: driver. */ int bus_add_driver(struct device_driver *drv) { struct subsys_private *sp = bus_to_subsys(drv->bus); struct driver_private *priv; int error = 0; if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the driver is removed from the bus */ pr_debug("bus: '%s': add driver %s\n", sp->bus->name, drv->name); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { error = -ENOMEM; goto out_put_bus; } klist_init(&priv->klist_devices, NULL, NULL); priv->driver = drv; drv->p = priv; priv->kobj.kset = sp->drivers_kset; error = kobject_init_and_add(&priv->kobj, &driver_ktype, NULL, "%s", drv->name); if (error) goto out_unregister; klist_add_tail(&priv->knode_bus, &sp->klist_drivers); if (sp->drivers_autoprobe) { error = driver_attach(drv); if (error) goto out_del_list; } error = module_add_driver(drv->owner, drv); if (error) { printk(KERN_ERR "%s: failed to create module links for %s\n", __func__, drv->name); goto out_detach; } error = driver_create_file(drv, &driver_attr_uevent); if (error) { printk(KERN_ERR "%s: uevent attr (%s) failed\n", __func__, drv->name); } error = driver_add_groups(drv, sp->bus->drv_groups); if (error) { /* How the hell do we get out of this pickle? Give up */ printk(KERN_ERR "%s: driver_add_groups(%s) failed\n", __func__, drv->name); } if (!drv->suppress_bind_attrs) { error = add_bind_files(drv); if (error) { /* Ditto */ printk(KERN_ERR "%s: add_bind_files(%s) failed\n", __func__, drv->name); } } return 0; out_detach: driver_detach(drv); out_del_list: klist_del(&priv->knode_bus); out_unregister: kobject_put(&priv->kobj); /* drv->p is freed in driver_release() */ drv->p = NULL; out_put_bus: subsys_put(sp); return error; } /** * bus_remove_driver - delete driver from bus's knowledge. * @drv: driver. * * Detach the driver from the devices it controls, and remove * it from its bus's list of drivers. Finally, we drop the reference * to the bus we took in bus_add_driver(). */ void bus_remove_driver(struct device_driver *drv) { struct subsys_private *sp = bus_to_subsys(drv->bus); if (!sp) return; pr_debug("bus: '%s': remove driver %s\n", sp->bus->name, drv->name); if (!drv->suppress_bind_attrs) remove_bind_files(drv); driver_remove_groups(drv, sp->bus->drv_groups); driver_remove_file(drv, &driver_attr_uevent); klist_remove(&drv->p->knode_bus); driver_detach(drv); module_remove_driver(drv); kobject_put(&drv->p->kobj); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in bus_add_driver() */ subsys_put(sp); subsys_put(sp); } /* Helper for bus_rescan_devices's iter */ static int __must_check bus_rescan_devices_helper(struct device *dev, void *data) { int ret = 0; if (!dev->driver) { if (dev->parent && dev->bus->need_parent_lock) device_lock(dev->parent); ret = device_attach(dev); if (dev->parent && dev->bus->need_parent_lock) device_unlock(dev->parent); } return ret < 0 ? ret : 0; } /** * bus_rescan_devices - rescan devices on the bus for possible drivers * @bus: the bus to scan. * * This function will look for devices on the bus with no driver * attached and rescan it against existing drivers to see if it matches * any by calling device_attach() for the unbound devices. */ int bus_rescan_devices(const struct bus_type *bus) { return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); } EXPORT_SYMBOL_GPL(bus_rescan_devices); /** * device_reprobe - remove driver for a device and probe for a new driver * @dev: the device to reprobe * * This function detaches the attached driver (if any) for the given * device and restarts the driver probing process. It is intended * to use if probing criteria changed during a devices lifetime and * driver attachment should change accordingly. */ int device_reprobe(struct device *dev) { if (dev->driver) device_driver_detach(dev); return bus_rescan_devices_helper(dev, NULL); } EXPORT_SYMBOL_GPL(device_reprobe); static void klist_devices_get(struct klist_node *n) { struct device_private *dev_prv = to_device_private_bus(n); struct device *dev = dev_prv->device; get_device(dev); } static void klist_devices_put(struct klist_node *n) { struct device_private *dev_prv = to_device_private_bus(n); struct device *dev = dev_prv->device; put_device(dev); } static ssize_t bus_uevent_store(const struct bus_type *bus, const char *buf, size_t count) { struct subsys_private *sp = bus_to_subsys(bus); int ret; if (!sp) return -EINVAL; ret = kobject_synth_uevent(&sp->subsys.kobj, buf, count); subsys_put(sp); if (ret) return ret; return count; } /* * "open code" the old BUS_ATTR() macro here. We want to use BUS_ATTR_WO() * here, but can not use it as earlier in the file we have * DEVICE_ATTR_WO(uevent), which would cause a clash with the with the store * function name. */ static struct bus_attribute bus_attr_uevent = __ATTR(uevent, 0200, NULL, bus_uevent_store); /** * bus_register - register a driver-core subsystem * @bus: bus to register * * Once we have that, we register the bus with the kobject * infrastructure, then register the children subsystems it has: * the devices and drivers that belong to the subsystem. */ int bus_register(const struct bus_type *bus) { int retval; struct subsys_private *priv; struct kobject *bus_kobj; struct lock_class_key *key; priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL); if (!priv) return -ENOMEM; priv->bus = bus; BLOCKING_INIT_NOTIFIER_HEAD(&priv->bus_notifier); bus_kobj = &priv->subsys.kobj; retval = kobject_set_name(bus_kobj, "%s", bus->name); if (retval) goto out; bus_kobj->kset = bus_kset; bus_kobj->ktype = &bus_ktype; priv->drivers_autoprobe = 1; retval = kset_register(&priv->subsys); if (retval) goto out; retval = bus_create_file(bus, &bus_attr_uevent); if (retval) goto bus_uevent_fail; priv->devices_kset = kset_create_and_add("devices", NULL, bus_kobj); if (!priv->devices_kset) { retval = -ENOMEM; goto bus_devices_fail; } priv->drivers_kset = kset_create_and_add("drivers", NULL, bus_kobj); if (!priv->drivers_kset) { retval = -ENOMEM; goto bus_drivers_fail; } INIT_LIST_HEAD(&priv->interfaces); key = &priv->lock_key; lockdep_register_key(key); __mutex_init(&priv->mutex, "subsys mutex", key); klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put); klist_init(&priv->klist_drivers, NULL, NULL); retval = add_probe_files(bus); if (retval) goto bus_probe_files_fail; retval = sysfs_create_groups(bus_kobj, bus->bus_groups); if (retval) goto bus_groups_fail; pr_debug("bus: '%s': registered\n", bus->name); return 0; bus_groups_fail: remove_probe_files(bus); bus_probe_files_fail: kset_unregister(priv->drivers_kset); bus_drivers_fail: kset_unregister(priv->devices_kset); bus_devices_fail: bus_remove_file(bus, &bus_attr_uevent); bus_uevent_fail: kset_unregister(&priv->subsys); /* Above kset_unregister() will kfree @priv */ priv = NULL; out: kfree(priv); return retval; } EXPORT_SYMBOL_GPL(bus_register); /** * bus_unregister - remove a bus from the system * @bus: bus. * * Unregister the child subsystems and the bus itself. * Finally, we call bus_put() to release the refcount */ void bus_unregister(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kobject *bus_kobj; if (!sp) return; pr_debug("bus: '%s': unregistering\n", bus->name); if (sp->dev_root) device_unregister(sp->dev_root); bus_kobj = &sp->subsys.kobj; sysfs_remove_groups(bus_kobj, bus->bus_groups); remove_probe_files(bus); bus_remove_file(bus, &bus_attr_uevent); kset_unregister(sp->drivers_kset); kset_unregister(sp->devices_kset); kset_unregister(&sp->subsys); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_unregister); int bus_register_notifier(const struct bus_type *bus, struct notifier_block *nb) { struct subsys_private *sp = bus_to_subsys(bus); int retval; if (!sp) return -EINVAL; retval = blocking_notifier_chain_register(&sp->bus_notifier, nb); subsys_put(sp); return retval; } EXPORT_SYMBOL_GPL(bus_register_notifier); int bus_unregister_notifier(const struct bus_type *bus, struct notifier_block *nb) { struct subsys_private *sp = bus_to_subsys(bus); int retval; if (!sp) return -EINVAL; retval = blocking_notifier_chain_unregister(&sp->bus_notifier, nb); subsys_put(sp); return retval; } EXPORT_SYMBOL_GPL(bus_unregister_notifier); void bus_notify(struct device *dev, enum bus_notifier_event value) { struct subsys_private *sp = bus_to_subsys(dev->bus); if (!sp) return; blocking_notifier_call_chain(&sp->bus_notifier, value, dev); subsys_put(sp); } struct kset *bus_get_kset(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kset *kset; if (!sp) return NULL; kset = &sp->subsys; subsys_put(sp); return kset; } EXPORT_SYMBOL_GPL(bus_get_kset); /* * Yes, this forcibly breaks the klist abstraction temporarily. It * just wants to sort the klist, not change reference counts and * take/drop locks rapidly in the process. It does all this while * holding the lock for the list, so objects can't otherwise be * added/removed while we're swizzling. */ static void device_insertion_sort_klist(struct device *a, struct list_head *list, int (*compare)(const struct device *a, const struct device *b)) { struct klist_node *n; struct device_private *dev_prv; struct device *b; list_for_each_entry(n, list, n_node) { dev_prv = to_device_private_bus(n); b = dev_prv->device; if (compare(a, b) <= 0) { list_move_tail(&a->p->knode_bus.n_node, &b->p->knode_bus.n_node); return; } } list_move_tail(&a->p->knode_bus.n_node, list); } void bus_sort_breadthfirst(const struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)) { struct subsys_private *sp = bus_to_subsys(bus); LIST_HEAD(sorted_devices); struct klist_node *n, *tmp; struct device_private *dev_prv; struct device *dev; struct klist *device_klist; if (!sp) return; device_klist = &sp->klist_devices; spin_lock(&device_klist->k_lock); list_for_each_entry_safe(n, tmp, &device_klist->k_list, n_node) { dev_prv = to_device_private_bus(n); dev = dev_prv->device; device_insertion_sort_klist(dev, &sorted_devices, compare); } list_splice(&sorted_devices, &device_klist->k_list); spin_unlock(&device_klist->k_lock); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_sort_breadthfirst); struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; /** * subsys_dev_iter_init - initialize subsys device iterator * @iter: subsys iterator to initialize * @sp: the subsys private (i.e. bus) we wanna iterate over * @start: the device to start iterating from, if any * @type: device_type of the devices to iterate over, NULL for all * * Initialize subsys iterator @iter such that it iterates over devices * of @subsys. If @start is set, the list iteration will start there, * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ static void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct subsys_private *sp, struct device *start, const struct device_type *type) { struct klist_node *start_knode = NULL; if (start) start_knode = &start->p->knode_bus; klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode); iter->type = type; } /** * subsys_dev_iter_next - iterate to the next device * @iter: subsys iterator to proceed * * Proceed @iter to the next device and return it. Returns NULL if * iteration is complete. * * The returned device is referenced and won't be released till * iterator is proceed to the next device or exited. The caller is * free to do whatever it wants to do with the device including * calling back into subsys code. */ static struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter) { struct klist_node *knode; struct device *dev; for (;;) { knode = klist_next(&iter->ki); if (!knode) return NULL; dev = to_device_private_bus(knode)->device; if (!iter->type || iter->type == dev->type) return dev; } } /** * subsys_dev_iter_exit - finish iteration * @iter: subsys iterator to finish * * Finish an iteration. Always call this function after iteration is * complete whether the iteration ran till the end or not. */ static void subsys_dev_iter_exit(struct subsys_dev_iter *iter) { klist_iter_exit(&iter->ki); } int subsys_interface_register(struct subsys_interface *sif) { struct subsys_private *sp; struct subsys_dev_iter iter; struct device *dev; if (!sif || !sif->subsys) return -ENODEV; sp = bus_to_subsys(sif->subsys); if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the interface is removed from the bus */ mutex_lock(&sp->mutex); list_add_tail(&sif->node, &sp->interfaces); if (sif->add_dev) { subsys_dev_iter_init(&iter, sp, NULL, NULL); while ((dev = subsys_dev_iter_next(&iter))) sif->add_dev(dev, sif); subsys_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); return 0; } EXPORT_SYMBOL_GPL(subsys_interface_register); void subsys_interface_unregister(struct subsys_interface *sif) { struct subsys_private *sp; struct subsys_dev_iter iter; struct device *dev; if (!sif || !sif->subsys) return; sp = bus_to_subsys(sif->subsys); if (!sp) return; mutex_lock(&sp->mutex); list_del_init(&sif->node); if (sif->remove_dev) { subsys_dev_iter_init(&iter, sp, NULL, NULL); while ((dev = subsys_dev_iter_next(&iter))) sif->remove_dev(dev, sif); subsys_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in subsys_interface_register() */ subsys_put(sp); subsys_put(sp); } EXPORT_SYMBOL_GPL(subsys_interface_unregister); static void system_root_device_release(struct device *dev) { kfree(dev); } static int subsys_register(const struct bus_type *subsys, const struct attribute_group **groups, struct kobject *parent_of_root) { struct subsys_private *sp; struct device *dev; int err; err = bus_register(subsys); if (err < 0) return err; sp = bus_to_subsys(subsys); if (!sp) { err = -EINVAL; goto err_sp; } dev = kzalloc(sizeof(struct device), GFP_KERNEL); if (!dev) { err = -ENOMEM; goto err_dev; } err = dev_set_name(dev, "%s", subsys->name); if (err < 0) goto err_name; dev->kobj.parent = parent_of_root; dev->groups = groups; dev->release = system_root_device_release; err = device_register(dev); if (err < 0) goto err_dev_reg; sp->dev_root = dev; subsys_put(sp); return 0; err_dev_reg: put_device(dev); dev = NULL; err_name: kfree(dev); err_dev: subsys_put(sp); err_sp: bus_unregister(subsys); return err; } /** * subsys_system_register - register a subsystem at /sys/devices/system/ * @subsys: system subsystem * @groups: default attributes for the root device * * All 'system' subsystems have a /sys/devices/system/<name> root device * with the name of the subsystem. The root device can carry subsystem- * wide attributes. All registered devices are below this single root * device and are named after the subsystem with a simple enumeration * number appended. The registered devices are not explicitly named; * only 'id' in the device needs to be set. * * Do not use this interface for anything new, it exists for compatibility * with bad ideas only. New subsystems should use plain subsystems; and * add the subsystem-wide attributes should be added to the subsystem * directory itself and not some create fake root-device placed in * /sys/devices/system/<name>. */ int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups) { return subsys_register(subsys, groups, &system_kset->kobj); } EXPORT_SYMBOL_GPL(subsys_system_register); /** * subsys_virtual_register - register a subsystem at /sys/devices/virtual/ * @subsys: virtual subsystem * @groups: default attributes for the root device * * All 'virtual' subsystems have a /sys/devices/system/<name> root device * with the name of the subystem. The root device can carry subsystem-wide * attributes. All registered devices are below this single root device. * There's no restriction on device naming. This is for kernel software * constructs which need sysfs interface. */ int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups) { struct kobject *virtual_dir; virtual_dir = virtual_device_parent(); if (!virtual_dir) return -ENOMEM; return subsys_register(subsys, groups, virtual_dir); } EXPORT_SYMBOL_GPL(subsys_virtual_register); /** * driver_find - locate driver on a bus by its name. * @name: name of the driver. * @bus: bus to scan for the driver. * * Call kset_find_obj() to iterate over list of drivers on * a bus to find driver by name. Return driver if found. * * This routine provides no locking to prevent the driver it returns * from being unregistered or unloaded while the caller is using it. * The caller is responsible for preventing this. */ struct device_driver *driver_find(const char *name, const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kobject *k; struct driver_private *priv; if (!sp) return NULL; k = kset_find_obj(sp->drivers_kset, name); subsys_put(sp); if (!k) return NULL; priv = to_driver(k); /* Drop reference added by kset_find_obj() */ kobject_put(k); return priv->driver; } EXPORT_SYMBOL_GPL(driver_find); /* * Warning, the value could go to "removed" instantly after calling this function, so be very * careful when calling it... */ bool bus_is_registered(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); bool is_initialized = false; if (sp) { is_initialized = true; subsys_put(sp); } return is_initialized; } /** * bus_get_dev_root - return a pointer to the "device root" of a bus * @bus: bus to return the device root of. * * If a bus has a "device root" structure, return it, WITH THE REFERENCE * COUNT INCREMENTED. * * Note, when finished with the device, a call to put_device() is required. * * If the device root is not present (or bus is not a valid pointer), NULL * will be returned. */ struct device *bus_get_dev_root(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct device *dev_root; if (!sp) return NULL; dev_root = get_device(sp->dev_root); subsys_put(sp); return dev_root; } EXPORT_SYMBOL_GPL(bus_get_dev_root); int __init buses_init(void) { bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL); if (!bus_kset) return -ENOMEM; system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj); if (!system_kset) { /* Do error handling here as devices_init() do */ kset_unregister(bus_kset); bus_kset = NULL; pr_err("%s: failed to create and add kset 'bus'\n", __func__); return -ENOMEM; } return 0; }
2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 // SPDX-License-Identifier: GPL-2.0-only /* The industrial I/O core * * Copyright (c) 2008 Jonathan Cameron * * Handling of buffer allocation / resizing. * * Things to look at here. * - Better memory allocation techniques? * - Alternative access techniques? */ #include <linux/atomic.h> #include <linux/anon_inodes.h> #include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/device.h> #include <linux/dma-buf.h> #include <linux/dma-fence.h> #include <linux/dma-resv.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/cdev.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/poll.h> #include <linux/sched/signal.h> #include <linux/iio/iio.h> #include <linux/iio/iio-opaque.h> #include "iio_core.h" #include "iio_core_trigger.h" #include <linux/iio/sysfs.h> #include <linux/iio/buffer.h> #include <linux/iio/buffer_impl.h> #define DMABUF_ENQUEUE_TIMEOUT_MS 5000 MODULE_IMPORT_NS("DMA_BUF"); struct iio_dmabuf_priv { struct list_head entry; struct kref ref; struct iio_buffer *buffer; struct iio_dma_buffer_block *block; u64 context; /* Spinlock used for locking the dma_fence */ spinlock_t lock; struct dma_buf_attachment *attach; struct sg_table *sgt; enum dma_data_direction dir; atomic_t seqno; }; struct iio_dma_fence { struct dma_fence base; struct iio_dmabuf_priv *priv; struct work_struct work; }; static const char * const iio_endian_prefix[] = { [IIO_BE] = "be", [IIO_LE] = "le", }; static bool iio_buffer_is_active(struct iio_buffer *buf) { return !list_empty(&buf->buffer_list); } static size_t iio_buffer_data_available(struct iio_buffer *buf) { return buf->access->data_available(buf); } static int iio_buffer_flush_hwfifo(struct iio_dev *indio_dev, struct iio_buffer *buf, size_t required) { if (!indio_dev->info->hwfifo_flush_to_buffer) return -ENODEV; return indio_dev->info->hwfifo_flush_to_buffer(indio_dev, required); } static bool iio_buffer_ready(struct iio_dev *indio_dev, struct iio_buffer *buf, size_t to_wait, int to_flush) { size_t avail; int flushed = 0; /* wakeup if the device was unregistered */ if (!indio_dev->info) return true; /* drain the buffer if it was disabled */ if (!iio_buffer_is_active(buf)) { to_wait = min_t(size_t, to_wait, 1); to_flush = 0; } avail = iio_buffer_data_available(buf); if (avail >= to_wait) { /* force a flush for non-blocking reads */ if (!to_wait && avail < to_flush) iio_buffer_flush_hwfifo(indio_dev, buf, to_flush - avail); return true; } if (to_flush) flushed = iio_buffer_flush_hwfifo(indio_dev, buf, to_wait - avail); if (flushed <= 0) return false; if (avail + flushed >= to_wait) return true; return false; } /** * iio_buffer_read() - chrdev read for buffer access * @filp: File structure pointer for the char device * @buf: Destination buffer for iio buffer read * @n: First n bytes to read * @f_ps: Long offset provided by the user as a seek position * * This function relies on all buffer implementations having an * iio_buffer as their first element. * * Return: negative values corresponding to error codes or ret != 0 * for ending the reading activity **/ static ssize_t iio_buffer_read(struct file *filp, char __user *buf, size_t n, loff_t *f_ps) { struct iio_dev_buffer_pair *ib = filp->private_data; struct iio_buffer *rb = ib->buffer; struct iio_dev *indio_dev = ib->indio_dev; DEFINE_WAIT_FUNC(wait, woken_wake_function); size_t datum_size; size_t to_wait; int ret = 0; if (!indio_dev->info) return -ENODEV; if (!rb || !rb->access->read) return -EINVAL; if (rb->direction != IIO_BUFFER_DIRECTION_IN) return -EPERM; datum_size = rb->bytes_per_datum; /* * If datum_size is 0 there will never be anything to read from the * buffer, so signal end of file now. */ if (!datum_size) return 0; if (filp->f_flags & O_NONBLOCK) to_wait = 0; else to_wait = min_t(size_t, n / datum_size, rb->watermark); add_wait_queue(&rb->pollq, &wait); do { if (!indio_dev->info) { ret = -ENODEV; break; } if (!iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); continue; } ret = rb->access->read(rb, n, buf); if (ret == 0 && (filp->f_flags & O_NONBLOCK)) ret = -EAGAIN; } while (ret == 0); remove_wait_queue(&rb->pollq, &wait); return ret; } static size_t iio_buffer_space_available(struct iio_buffer *buf) { if (buf->access->space_available) return buf->access->space_available(buf); return SIZE_MAX; } static ssize_t iio_buffer_write(struct file *filp, const char __user *buf, size_t n, loff_t *f_ps) { struct iio_dev_buffer_pair *ib = filp->private_data; struct iio_buffer *rb = ib->buffer; struct iio_dev *indio_dev = ib->indio_dev; DEFINE_WAIT_FUNC(wait, woken_wake_function); int ret = 0; size_t written; if (!indio_dev->info) return -ENODEV; if (!rb || !rb->access->write) return -EINVAL; if (rb->direction != IIO_BUFFER_DIRECTION_OUT) return -EPERM; written = 0; add_wait_queue(&rb->pollq, &wait); do { if (!indio_dev->info) return -ENODEV; if (!iio_buffer_space_available(rb)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (filp->f_flags & O_NONBLOCK) { if (!written) ret = -EAGAIN; break; } wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); continue; } ret = rb->access->write(rb, n - written, buf + written); if (ret < 0) break; written += ret; } while (written != n); remove_wait_queue(&rb->pollq, &wait); return ret < 0 ? ret : written; } /** * iio_buffer_poll() - poll the buffer to find out if it has data * @filp: File structure pointer for device access * @wait: Poll table structure pointer for which the driver adds * a wait queue * * Return: (EPOLLIN | EPOLLRDNORM) if data is available for reading * or 0 for other cases */ static __poll_t iio_buffer_poll(struct file *filp, struct poll_table_struct *wait) { struct iio_dev_buffer_pair *ib = filp->private_data; struct iio_buffer *rb = ib->buffer; struct iio_dev *indio_dev = ib->indio_dev; if (!indio_dev->info || !rb) return 0; poll_wait(filp, &rb->pollq, wait); switch (rb->direction) { case IIO_BUFFER_DIRECTION_IN: if (iio_buffer_ready(indio_dev, rb, rb->watermark, 0)) return EPOLLIN | EPOLLRDNORM; break; case IIO_BUFFER_DIRECTION_OUT: if (iio_buffer_space_available(rb)) return EPOLLOUT | EPOLLWRNORM; break; } return 0; } ssize_t iio_buffer_read_wrapper(struct file *filp, char __user *buf, size_t n, loff_t *f_ps) { struct iio_dev_buffer_pair *ib = filp->private_data; struct iio_buffer *rb = ib->buffer; /* check if buffer was opened through new API */ if (test_bit(IIO_BUSY_BIT_POS, &rb->flags)) return -EBUSY; return iio_buffer_read(filp, buf, n, f_ps); } ssize_t iio_buffer_write_wrapper(struct file *filp, const char __user *buf, size_t n, loff_t *f_ps) { struct iio_dev_buffer_pair *ib = filp->private_data; struct iio_buffer *rb = ib->buffer; /* check if buffer was opened through new API */ if (test_bit(IIO_BUSY_BIT_POS, &rb->flags)) return -EBUSY; return iio_buffer_write(filp, buf, n, f_ps); } __poll_t iio_buffer_poll_wrapper(struct file *filp, struct poll_table_struct *wait) { struct iio_dev_buffer_pair *ib = filp->private_data; struct iio_buffer *rb = ib->buffer; /* check if buffer was opened through new API */ if (test_bit(IIO_BUSY_BIT_POS, &rb->flags)) return 0; return iio_buffer_poll(filp, wait); } /** * iio_buffer_wakeup_poll - Wakes up the buffer waitqueue * @indio_dev: The IIO device * * Wakes up the event waitqueue used for poll(). Should usually * be called when the device is unregistered. */ void iio_buffer_wakeup_poll(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer; unsigned int i; for (i = 0; i < iio_dev_opaque->attached_buffers_cnt; i++) { buffer = iio_dev_opaque->attached_buffers[i]; wake_up(&buffer->pollq); } } int iio_pop_from_buffer(struct iio_buffer *buffer, void *data) { if (!buffer || !buffer->access || !buffer->access->remove_from) return -EINVAL; return buffer->access->remove_from(buffer, data); } EXPORT_SYMBOL_GPL(iio_pop_from_buffer); void iio_buffer_init(struct iio_buffer *buffer) { INIT_LIST_HEAD(&buffer->demux_list); INIT_LIST_HEAD(&buffer->buffer_list); INIT_LIST_HEAD(&buffer->dmabufs); mutex_init(&buffer->dmabufs_mutex); init_waitqueue_head(&buffer->pollq); kref_init(&buffer->ref); if (!buffer->watermark) buffer->watermark = 1; } EXPORT_SYMBOL(iio_buffer_init); void iio_device_detach_buffers(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer; unsigned int i; for (i = 0; i < iio_dev_opaque->attached_buffers_cnt; i++) { buffer = iio_dev_opaque->attached_buffers[i]; iio_buffer_put(buffer); } kfree(iio_dev_opaque->attached_buffers); } static ssize_t iio_show_scan_index(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", to_iio_dev_attr(attr)->c->scan_index); } static ssize_t iio_show_fixed_type(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); const struct iio_scan_type *scan_type; u8 type; scan_type = iio_get_current_scan_type(indio_dev, this_attr->c); if (IS_ERR(scan_type)) return PTR_ERR(scan_type); type = scan_type->endianness; if (type == IIO_CPU) { #ifdef __LITTLE_ENDIAN type = IIO_LE; #else type = IIO_BE; #endif } if (scan_type->repeat > 1) return sysfs_emit(buf, "%s:%c%d/%dX%d>>%u\n", iio_endian_prefix[type], scan_type->sign, scan_type->realbits, scan_type->storagebits, scan_type->repeat, scan_type->shift); else return sysfs_emit(buf, "%s:%c%d/%d>>%u\n", iio_endian_prefix[type], scan_type->sign, scan_type->realbits, scan_type->storagebits, scan_type->shift); } static ssize_t iio_scan_el_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; /* Ensure ret is 0 or 1. */ ret = !!test_bit(to_iio_dev_attr(attr)->address, buffer->scan_mask); return sysfs_emit(buf, "%d\n", ret); } /* Note NULL used as error indicator as it doesn't make sense. */ static const unsigned long *iio_scan_mask_match(const unsigned long *av_masks, unsigned int masklength, const unsigned long *mask, bool strict) { if (bitmap_empty(mask, masklength)) return NULL; /* * The condition here do not handle multi-long masks correctly. * It only checks the first long to be zero, and will use such mask * as a terminator even if there was bits set after the first long. * * Correct check would require using: * while (!bitmap_empty(av_masks, masklength)) * instead. This is potentially hazardous because the * avaliable_scan_masks is a zero terminated array of longs - and * using the proper bitmap_empty() check for multi-long wide masks * would require the array to be terminated with multiple zero longs - * which is not such an usual pattern. * * As writing of this no multi-long wide masks were found in-tree, so * the simple while (*av_masks) check is working. */ while (*av_masks) { if (strict) { if (bitmap_equal(mask, av_masks, masklength)) return av_masks; } else { if (bitmap_subset(mask, av_masks, masklength)) return av_masks; } av_masks += BITS_TO_LONGS(masklength); } return NULL; } static bool iio_validate_scan_mask(struct iio_dev *indio_dev, const unsigned long *mask) { if (!indio_dev->setup_ops->validate_scan_mask) return true; return indio_dev->setup_ops->validate_scan_mask(indio_dev, mask); } /** * iio_scan_mask_set() - set particular bit in the scan mask * @indio_dev: the iio device * @buffer: the buffer whose scan mask we are interested in * @bit: the bit to be set. * * Note that at this point we have no way of knowing what other * buffers might request, hence this code only verifies that the * individual buffers request is plausible. */ static int iio_scan_mask_set(struct iio_dev *indio_dev, struct iio_buffer *buffer, int bit) { unsigned int masklength = iio_get_masklength(indio_dev); const unsigned long *mask; unsigned long *trialmask; if (!masklength) { WARN(1, "Trying to set scanmask prior to registering buffer\n"); return -EINVAL; } trialmask = bitmap_alloc(masklength, GFP_KERNEL); if (!trialmask) return -ENOMEM; bitmap_copy(trialmask, buffer->scan_mask, masklength); set_bit(bit, trialmask); if (!iio_validate_scan_mask(indio_dev, trialmask)) goto err_invalid_mask; if (indio_dev->available_scan_masks) { mask = iio_scan_mask_match(indio_dev->available_scan_masks, masklength, trialmask, false); if (!mask) goto err_invalid_mask; } bitmap_copy(buffer->scan_mask, trialmask, masklength); bitmap_free(trialmask); return 0; err_invalid_mask: bitmap_free(trialmask); return -EINVAL; } static int iio_scan_mask_clear(struct iio_buffer *buffer, int bit) { clear_bit(bit, buffer->scan_mask); return 0; } static int iio_scan_mask_query(struct iio_dev *indio_dev, struct iio_buffer *buffer, int bit) { if (bit > iio_get_masklength(indio_dev)) return -EINVAL; if (!buffer->scan_mask) return 0; /* Ensure return value is 0 or 1. */ return !!test_bit(bit, buffer->scan_mask); }; static ssize_t iio_scan_el_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { int ret; bool state; struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); struct iio_buffer *buffer = this_attr->buffer; ret = kstrtobool(buf, &state); if (ret < 0) return ret; guard(mutex)(&iio_dev_opaque->mlock); if (iio_buffer_is_active(buffer)) return -EBUSY; ret = iio_scan_mask_query(indio_dev, buffer, this_attr->address); if (ret < 0) return ret; if (state && ret) return len; if (state) ret = iio_scan_mask_set(indio_dev, buffer, this_attr->address); else ret = iio_scan_mask_clear(buffer, this_attr->address); if (ret) return ret; return len; } static ssize_t iio_scan_el_ts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; return sysfs_emit(buf, "%d\n", buffer->scan_timestamp); } static ssize_t iio_scan_el_ts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { int ret; struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; bool state; ret = kstrtobool(buf, &state); if (ret < 0) return ret; guard(mutex)(&iio_dev_opaque->mlock); if (iio_buffer_is_active(buffer)) return -EBUSY; buffer->scan_timestamp = state; return len; } static int iio_buffer_add_channel_sysfs(struct iio_dev *indio_dev, struct iio_buffer *buffer, const struct iio_chan_spec *chan) { int ret, attrcount = 0; ret = __iio_add_chan_devattr("index", chan, &iio_show_scan_index, NULL, 0, IIO_SEPARATE, &indio_dev->dev, buffer, &buffer->buffer_attr_list); if (ret) return ret; attrcount++; ret = __iio_add_chan_devattr("type", chan, &iio_show_fixed_type, NULL, 0, IIO_SEPARATE, &indio_dev->dev, buffer, &buffer->buffer_attr_list); if (ret) return ret; attrcount++; if (chan->type != IIO_TIMESTAMP) ret = __iio_add_chan_devattr("en", chan, &iio_scan_el_show, &iio_scan_el_store, chan->scan_index, IIO_SEPARATE, &indio_dev->dev, buffer, &buffer->buffer_attr_list); else ret = __iio_add_chan_devattr("en", chan, &iio_scan_el_ts_show, &iio_scan_el_ts_store, chan->scan_index, IIO_SEPARATE, &indio_dev->dev, buffer, &buffer->buffer_attr_list); if (ret) return ret; attrcount++; ret = attrcount; return ret; } static ssize_t length_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; return sysfs_emit(buf, "%d\n", buffer->length); } static ssize_t length_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; unsigned int val; int ret; ret = kstrtouint(buf, 10, &val); if (ret) return ret; if (val == buffer->length) return len; guard(mutex)(&iio_dev_opaque->mlock); if (iio_buffer_is_active(buffer)) return -EBUSY; buffer->access->set_length(buffer, val); if (buffer->length && buffer->length < buffer->watermark) buffer->watermark = buffer->length; return len; } static ssize_t enable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; return sysfs_emit(buf, "%d\n", iio_buffer_is_active(buffer)); } static int iio_storage_bytes_for_si(struct iio_dev *indio_dev, unsigned int scan_index) { const struct iio_chan_spec *ch; const struct iio_scan_type *scan_type; unsigned int bytes; ch = iio_find_channel_from_si(indio_dev, scan_index); scan_type = iio_get_current_scan_type(indio_dev, ch); if (IS_ERR(scan_type)) return PTR_ERR(scan_type); bytes = scan_type->storagebits / 8; if (scan_type->repeat > 1) bytes *= scan_type->repeat; return bytes; } static int iio_storage_bytes_for_timestamp(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); return iio_storage_bytes_for_si(indio_dev, iio_dev_opaque->scan_index_timestamp); } static int iio_compute_scan_bytes(struct iio_dev *indio_dev, const unsigned long *mask, bool timestamp) { unsigned int bytes = 0; int length, i, largest = 0; /* How much space will the demuxed element take? */ for_each_set_bit(i, mask, iio_get_masklength(indio_dev)) { length = iio_storage_bytes_for_si(indio_dev, i); if (length < 0) return length; bytes = ALIGN(bytes, length); bytes += length; largest = max(largest, length); } if (timestamp) { length = iio_storage_bytes_for_timestamp(indio_dev); if (length < 0) return length; bytes = ALIGN(bytes, length); bytes += length; largest = max(largest, length); } bytes = ALIGN(bytes, largest); return bytes; } static void iio_buffer_activate(struct iio_dev *indio_dev, struct iio_buffer *buffer) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); iio_buffer_get(buffer); list_add(&buffer->buffer_list, &iio_dev_opaque->buffer_list); } static void iio_buffer_deactivate(struct iio_buffer *buffer) { list_del_init(&buffer->buffer_list); wake_up_interruptible(&buffer->pollq); iio_buffer_put(buffer); } static void iio_buffer_deactivate_all(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer, *_buffer; list_for_each_entry_safe(buffer, _buffer, &iio_dev_opaque->buffer_list, buffer_list) iio_buffer_deactivate(buffer); } static int iio_buffer_enable(struct iio_buffer *buffer, struct iio_dev *indio_dev) { if (!buffer->access->enable) return 0; return buffer->access->enable(buffer, indio_dev); } static int iio_buffer_disable(struct iio_buffer *buffer, struct iio_dev *indio_dev) { if (!buffer->access->disable) return 0; return buffer->access->disable(buffer, indio_dev); } static void iio_buffer_update_bytes_per_datum(struct iio_dev *indio_dev, struct iio_buffer *buffer) { unsigned int bytes; if (!buffer->access->set_bytes_per_datum) return; bytes = iio_compute_scan_bytes(indio_dev, buffer->scan_mask, buffer->scan_timestamp); buffer->access->set_bytes_per_datum(buffer, bytes); } static int iio_buffer_request_update(struct iio_dev *indio_dev, struct iio_buffer *buffer) { int ret; iio_buffer_update_bytes_per_datum(indio_dev, buffer); if (buffer->access->request_update) { ret = buffer->access->request_update(buffer); if (ret) { dev_dbg(&indio_dev->dev, "Buffer not started: buffer parameter update failed (%d)\n", ret); return ret; } } return 0; } static void iio_free_scan_mask(struct iio_dev *indio_dev, const unsigned long *mask) { /* If the mask is dynamically allocated free it, otherwise do nothing */ if (!indio_dev->available_scan_masks) bitmap_free(mask); } struct iio_device_config { unsigned int mode; unsigned int watermark; const unsigned long *scan_mask; unsigned int scan_bytes; bool scan_timestamp; }; static int iio_verify_update(struct iio_dev *indio_dev, struct iio_buffer *insert_buffer, struct iio_buffer *remove_buffer, struct iio_device_config *config) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); unsigned int masklength = iio_get_masklength(indio_dev); unsigned long *compound_mask; const unsigned long *scan_mask; bool strict_scanmask = false; struct iio_buffer *buffer; bool scan_timestamp; unsigned int modes; if (insert_buffer && bitmap_empty(insert_buffer->scan_mask, masklength)) { dev_dbg(&indio_dev->dev, "At least one scan element must be enabled first\n"); return -EINVAL; } memset(config, 0, sizeof(*config)); config->watermark = ~0; /* * If there is just one buffer and we are removing it there is nothing * to verify. */ if (remove_buffer && !insert_buffer && list_is_singular(&iio_dev_opaque->buffer_list)) return 0; modes = indio_dev->modes; list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) { if (buffer == remove_buffer) continue; modes &= buffer->access->modes; config->watermark = min(config->watermark, buffer->watermark); } if (insert_buffer) { modes &= insert_buffer->access->modes; config->watermark = min(config->watermark, insert_buffer->watermark); } /* Definitely possible for devices to support both of these. */ if ((modes & INDIO_BUFFER_TRIGGERED) && indio_dev->trig) { config->mode = INDIO_BUFFER_TRIGGERED; } else if (modes & INDIO_BUFFER_HARDWARE) { /* * Keep things simple for now and only allow a single buffer to * be connected in hardware mode. */ if (insert_buffer && !list_empty(&iio_dev_opaque->buffer_list)) return -EINVAL; config->mode = INDIO_BUFFER_HARDWARE; strict_scanmask = true; } else if (modes & INDIO_BUFFER_SOFTWARE) { config->mode = INDIO_BUFFER_SOFTWARE; } else { /* Can only occur on first buffer */ if (indio_dev->modes & INDIO_BUFFER_TRIGGERED) dev_dbg(&indio_dev->dev, "Buffer not started: no trigger\n"); return -EINVAL; } /* What scan mask do we actually have? */ compound_mask = bitmap_zalloc(masklength, GFP_KERNEL); if (!compound_mask) return -ENOMEM; scan_timestamp = false; list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) { if (buffer == remove_buffer) continue; bitmap_or(compound_mask, compound_mask, buffer->scan_mask, masklength); scan_timestamp |= buffer->scan_timestamp; } if (insert_buffer) { bitmap_or(compound_mask, compound_mask, insert_buffer->scan_mask, masklength); scan_timestamp |= insert_buffer->scan_timestamp; } if (indio_dev->available_scan_masks) { scan_mask = iio_scan_mask_match(indio_dev->available_scan_masks, masklength, compound_mask, strict_scanmask); bitmap_free(compound_mask); if (!scan_mask) return -EINVAL; } else { scan_mask = compound_mask; } config->scan_bytes = iio_compute_scan_bytes(indio_dev, scan_mask, scan_timestamp); config->scan_mask = scan_mask; config->scan_timestamp = scan_timestamp; return 0; } /** * struct iio_demux_table - table describing demux memcpy ops * @from: index to copy from * @to: index to copy to * @length: how many bytes to copy * @l: list head used for management */ struct iio_demux_table { unsigned int from; unsigned int to; unsigned int length; struct list_head l; }; static void iio_buffer_demux_free(struct iio_buffer *buffer) { struct iio_demux_table *p, *q; list_for_each_entry_safe(p, q, &buffer->demux_list, l) { list_del(&p->l); kfree(p); } } static int iio_buffer_add_demux(struct iio_buffer *buffer, struct iio_demux_table **p, unsigned int in_loc, unsigned int out_loc, unsigned int length) { if (*p && (*p)->from + (*p)->length == in_loc && (*p)->to + (*p)->length == out_loc) { (*p)->length += length; } else { *p = kmalloc(sizeof(**p), GFP_KERNEL); if (!(*p)) return -ENOMEM; (*p)->from = in_loc; (*p)->to = out_loc; (*p)->length = length; list_add_tail(&(*p)->l, &buffer->demux_list); } return 0; } static int iio_buffer_update_demux(struct iio_dev *indio_dev, struct iio_buffer *buffer) { unsigned int masklength = iio_get_masklength(indio_dev); int ret, in_ind = -1, out_ind, length; unsigned int in_loc = 0, out_loc = 0; struct iio_demux_table *p = NULL; /* Clear out any old demux */ iio_buffer_demux_free(buffer); kfree(buffer->demux_bounce); buffer->demux_bounce = NULL; /* First work out which scan mode we will actually have */ if (bitmap_equal(indio_dev->active_scan_mask, buffer->scan_mask, masklength)) return 0; /* Now we have the two masks, work from least sig and build up sizes */ for_each_set_bit(out_ind, buffer->scan_mask, masklength) { in_ind = find_next_bit(indio_dev->active_scan_mask, masklength, in_ind + 1); while (in_ind != out_ind) { ret = iio_storage_bytes_for_si(indio_dev, in_ind); if (ret < 0) goto error_clear_mux_table; length = ret; /* Make sure we are aligned */ in_loc = roundup(in_loc, length) + length; in_ind = find_next_bit(indio_dev->active_scan_mask, masklength, in_ind + 1); } ret = iio_storage_bytes_for_si(indio_dev, in_ind); if (ret < 0) goto error_clear_mux_table; length = ret; out_loc = roundup(out_loc, length); in_loc = roundup(in_loc, length); ret = iio_buffer_add_demux(buffer, &p, in_loc, out_loc, length); if (ret) goto error_clear_mux_table; out_loc += length; in_loc += length; } /* Relies on scan_timestamp being last */ if (buffer->scan_timestamp) { ret = iio_storage_bytes_for_timestamp(indio_dev); if (ret < 0) goto error_clear_mux_table; length = ret; out_loc = roundup(out_loc, length); in_loc = roundup(in_loc, length); ret = iio_buffer_add_demux(buffer, &p, in_loc, out_loc, length); if (ret) goto error_clear_mux_table; out_loc += length; } buffer->demux_bounce = kzalloc(out_loc, GFP_KERNEL); if (!buffer->demux_bounce) { ret = -ENOMEM; goto error_clear_mux_table; } return 0; error_clear_mux_table: iio_buffer_demux_free(buffer); return ret; } static int iio_update_demux(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer; int ret; list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) { ret = iio_buffer_update_demux(indio_dev, buffer); if (ret < 0) goto error_clear_mux_table; } return 0; error_clear_mux_table: list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) iio_buffer_demux_free(buffer); return ret; } static int iio_enable_buffers(struct iio_dev *indio_dev, struct iio_device_config *config) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer, *tmp = NULL; int ret; indio_dev->active_scan_mask = config->scan_mask; ACCESS_PRIVATE(indio_dev, scan_timestamp) = config->scan_timestamp; indio_dev->scan_bytes = config->scan_bytes; iio_dev_opaque->currentmode = config->mode; iio_update_demux(indio_dev); /* Wind up again */ if (indio_dev->setup_ops->preenable) { ret = indio_dev->setup_ops->preenable(indio_dev); if (ret) { dev_dbg(&indio_dev->dev, "Buffer not started: buffer preenable failed (%d)\n", ret); goto err_undo_config; } } if (indio_dev->info->update_scan_mode) { ret = indio_dev->info ->update_scan_mode(indio_dev, indio_dev->active_scan_mask); if (ret < 0) { dev_dbg(&indio_dev->dev, "Buffer not started: update scan mode failed (%d)\n", ret); goto err_run_postdisable; } } if (indio_dev->info->hwfifo_set_watermark) indio_dev->info->hwfifo_set_watermark(indio_dev, config->watermark); list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) { ret = iio_buffer_enable(buffer, indio_dev); if (ret) { tmp = buffer; goto err_disable_buffers; } } if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { ret = iio_trigger_attach_poll_func(indio_dev->trig, indio_dev->pollfunc); if (ret) goto err_disable_buffers; } if (indio_dev->setup_ops->postenable) { ret = indio_dev->setup_ops->postenable(indio_dev); if (ret) { dev_dbg(&indio_dev->dev, "Buffer not started: postenable failed (%d)\n", ret); goto err_detach_pollfunc; } } return 0; err_detach_pollfunc: if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { iio_trigger_detach_poll_func(indio_dev->trig, indio_dev->pollfunc); } err_disable_buffers: buffer = list_prepare_entry(tmp, &iio_dev_opaque->buffer_list, buffer_list); list_for_each_entry_continue_reverse(buffer, &iio_dev_opaque->buffer_list, buffer_list) iio_buffer_disable(buffer, indio_dev); err_run_postdisable: if (indio_dev->setup_ops->postdisable) indio_dev->setup_ops->postdisable(indio_dev); err_undo_config: iio_dev_opaque->currentmode = INDIO_DIRECT_MODE; indio_dev->active_scan_mask = NULL; return ret; } static int iio_disable_buffers(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer; int ret = 0; int ret2; /* Wind down existing buffers - iff there are any */ if (list_empty(&iio_dev_opaque->buffer_list)) return 0; /* * If things go wrong at some step in disable we still need to continue * to perform the other steps, otherwise we leave the device in a * inconsistent state. We return the error code for the first error we * encountered. */ if (indio_dev->setup_ops->predisable) { ret2 = indio_dev->setup_ops->predisable(indio_dev); if (ret2 && !ret) ret = ret2; } if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { iio_trigger_detach_poll_func(indio_dev->trig, indio_dev->pollfunc); } list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) { ret2 = iio_buffer_disable(buffer, indio_dev); if (ret2 && !ret) ret = ret2; } if (indio_dev->setup_ops->postdisable) { ret2 = indio_dev->setup_ops->postdisable(indio_dev); if (ret2 && !ret) ret = ret2; } iio_free_scan_mask(indio_dev, indio_dev->active_scan_mask); indio_dev->active_scan_mask = NULL; iio_dev_opaque->currentmode = INDIO_DIRECT_MODE; return ret; } static int __iio_update_buffers(struct iio_dev *indio_dev, struct iio_buffer *insert_buffer, struct iio_buffer *remove_buffer) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_device_config new_config; int ret; ret = iio_verify_update(indio_dev, insert_buffer, remove_buffer, &new_config); if (ret) return ret; if (insert_buffer) { ret = iio_buffer_request_update(indio_dev, insert_buffer); if (ret) goto err_free_config; } ret = iio_disable_buffers(indio_dev); if (ret) goto err_deactivate_all; if (remove_buffer) iio_buffer_deactivate(remove_buffer); if (insert_buffer) iio_buffer_activate(indio_dev, insert_buffer); /* If no buffers in list, we are done */ if (list_empty(&iio_dev_opaque->buffer_list)) return 0; ret = iio_enable_buffers(indio_dev, &new_config); if (ret) goto err_deactivate_all; return 0; err_deactivate_all: /* * We've already verified that the config is valid earlier. If things go * wrong in either enable or disable the most likely reason is an IO * error from the device. In this case there is no good recovery * strategy. Just make sure to disable everything and leave the device * in a sane state. With a bit of luck the device might come back to * life again later and userspace can try again. */ iio_buffer_deactivate_all(indio_dev); err_free_config: iio_free_scan_mask(indio_dev, new_config.scan_mask); return ret; } int iio_update_buffers(struct iio_dev *indio_dev, struct iio_buffer *insert_buffer, struct iio_buffer *remove_buffer) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); if (insert_buffer == remove_buffer) return 0; if (insert_buffer && insert_buffer->direction == IIO_BUFFER_DIRECTION_OUT) return -EINVAL; guard(mutex)(&iio_dev_opaque->info_exist_lock); guard(mutex)(&iio_dev_opaque->mlock); if (insert_buffer && iio_buffer_is_active(insert_buffer)) insert_buffer = NULL; if (remove_buffer && !iio_buffer_is_active(remove_buffer)) remove_buffer = NULL; if (!insert_buffer && !remove_buffer) return 0; if (!indio_dev->info) return -ENODEV; return __iio_update_buffers(indio_dev, insert_buffer, remove_buffer); } EXPORT_SYMBOL_GPL(iio_update_buffers); void iio_disable_all_buffers(struct iio_dev *indio_dev) { iio_disable_buffers(indio_dev); iio_buffer_deactivate_all(indio_dev); } static ssize_t enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { int ret; bool requested_state; struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; bool inlist; ret = kstrtobool(buf, &requested_state); if (ret < 0) return ret; guard(mutex)(&iio_dev_opaque->mlock); /* Find out if it is in the list */ inlist = iio_buffer_is_active(buffer); /* Already in desired state */ if (inlist == requested_state) return len; if (requested_state) ret = __iio_update_buffers(indio_dev, buffer, NULL); else ret = __iio_update_buffers(indio_dev, NULL, buffer); if (ret) return ret; return len; } static ssize_t watermark_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; return sysfs_emit(buf, "%u\n", buffer->watermark); } static ssize_t watermark_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; unsigned int val; int ret; ret = kstrtouint(buf, 10, &val); if (ret) return ret; if (!val) return -EINVAL; guard(mutex)(&iio_dev_opaque->mlock); if (val > buffer->length) return -EINVAL; if (iio_buffer_is_active(buffer)) return -EBUSY; buffer->watermark = val; return len; } static ssize_t data_available_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; return sysfs_emit(buf, "%zu\n", iio_buffer_data_available(buffer)); } static ssize_t direction_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; switch (buffer->direction) { case IIO_BUFFER_DIRECTION_IN: return sysfs_emit(buf, "in\n"); case IIO_BUFFER_DIRECTION_OUT: return sysfs_emit(buf, "out\n"); default: return -EINVAL; } } static DEVICE_ATTR_RW(length); static struct device_attribute dev_attr_length_ro = __ATTR_RO(length); static DEVICE_ATTR_RW(enable); static DEVICE_ATTR_RW(watermark); static struct device_attribute dev_attr_watermark_ro = __ATTR_RO(watermark); static DEVICE_ATTR_RO(data_available); static DEVICE_ATTR_RO(direction); /* * When adding new attributes here, put the at the end, at least until * the code that handles the length/length_ro & watermark/watermark_ro * assignments gets cleaned up. Otherwise these can create some weird * duplicate attributes errors under some setups. */ static struct attribute *iio_buffer_attrs[] = { &dev_attr_length.attr, &dev_attr_enable.attr, &dev_attr_watermark.attr, &dev_attr_data_available.attr, &dev_attr_direction.attr, }; #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) static struct attribute *iio_buffer_wrap_attr(struct iio_buffer *buffer, struct attribute *attr) { struct device_attribute *dattr = to_dev_attr(attr); struct iio_dev_attr *iio_attr; iio_attr = kzalloc(sizeof(*iio_attr), GFP_KERNEL); if (!iio_attr) return NULL; iio_attr->buffer = buffer; memcpy(&iio_attr->dev_attr, dattr, sizeof(iio_attr->dev_attr)); iio_attr->dev_attr.attr.name = kstrdup_const(attr->name, GFP_KERNEL); if (!iio_attr->dev_attr.attr.name) { kfree(iio_attr); return NULL; } sysfs_attr_init(&iio_attr->dev_attr.attr); list_add(&iio_attr->l, &buffer->buffer_attr_list); return &iio_attr->dev_attr.attr; } static int iio_buffer_register_legacy_sysfs_groups(struct iio_dev *indio_dev, struct attribute **buffer_attrs, int buffer_attrcount, int scan_el_attrcount) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct attribute_group *group; struct attribute **attrs; int ret; attrs = kcalloc(buffer_attrcount + 1, sizeof(*attrs), GFP_KERNEL); if (!attrs) return -ENOMEM; memcpy(attrs, buffer_attrs, buffer_attrcount * sizeof(*attrs)); group = &iio_dev_opaque->legacy_buffer_group; group->attrs = attrs; group->name = "buffer"; ret = iio_device_register_sysfs_group(indio_dev, group); if (ret) goto error_free_buffer_attrs; attrs = kcalloc(scan_el_attrcount + 1, sizeof(*attrs), GFP_KERNEL); if (!attrs) { ret = -ENOMEM; goto error_free_buffer_attrs; } memcpy(attrs, &buffer_attrs[buffer_attrcount], scan_el_attrcount * sizeof(*attrs)); group = &iio_dev_opaque->legacy_scan_el_group; group->attrs = attrs; group->name = "scan_elements"; ret = iio_device_register_sysfs_group(indio_dev, group); if (ret) goto error_free_scan_el_attrs; return 0; error_free_scan_el_attrs: kfree(iio_dev_opaque->legacy_scan_el_group.attrs); error_free_buffer_attrs: kfree(iio_dev_opaque->legacy_buffer_group.attrs); return ret; } static void iio_buffer_unregister_legacy_sysfs_groups(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); kfree(iio_dev_opaque->legacy_buffer_group.attrs); kfree(iio_dev_opaque->legacy_scan_el_group.attrs); } static void iio_buffer_dmabuf_release(struct kref *ref) { struct iio_dmabuf_priv *priv = container_of(ref, struct iio_dmabuf_priv, ref); struct dma_buf_attachment *attach = priv->attach; struct iio_buffer *buffer = priv->buffer; struct dma_buf *dmabuf = attach->dmabuf; dma_resv_lock(dmabuf->resv, NULL); dma_buf_unmap_attachment(attach, priv->sgt, priv->dir); dma_resv_unlock(dmabuf->resv); buffer->access->detach_dmabuf(buffer, priv->block); dma_buf_detach(attach->dmabuf, attach); dma_buf_put(dmabuf); kfree(priv); } static void iio_buffer_dmabuf_get(struct dma_buf_attachment *attach) { struct iio_dmabuf_priv *priv = attach->importer_priv; kref_get(&priv->ref); } static void iio_buffer_dmabuf_put(struct dma_buf_attachment *attach) { struct iio_dmabuf_priv *priv = attach->importer_priv; kref_put(&priv->ref, iio_buffer_dmabuf_release); } static int iio_buffer_chrdev_release(struct inode *inode, struct file *filep) { struct iio_dev_buffer_pair *ib = filep->private_data; struct iio_dev *indio_dev = ib->indio_dev; struct iio_buffer *buffer = ib->buffer; struct iio_dmabuf_priv *priv, *tmp; wake_up(&buffer->pollq); guard(mutex)(&buffer->dmabufs_mutex); /* Close all attached DMABUFs */ list_for_each_entry_safe(priv, tmp, &buffer->dmabufs, entry) { list_del_init(&priv->entry); iio_buffer_dmabuf_put(priv->attach); } kfree(ib); clear_bit(IIO_BUSY_BIT_POS, &buffer->flags); iio_device_put(indio_dev); return 0; } static int iio_dma_resv_lock(struct dma_buf *dmabuf, bool nonblock) { if (!nonblock) return dma_resv_lock_interruptible(dmabuf->resv, NULL); if (!dma_resv_trylock(dmabuf->resv)) return -EBUSY; return 0; } static struct dma_buf_attachment * iio_buffer_find_attachment(struct iio_dev_buffer_pair *ib, struct dma_buf *dmabuf, bool nonblock) { struct device *dev = ib->indio_dev->dev.parent; struct iio_buffer *buffer = ib->buffer; struct dma_buf_attachment *attach = NULL; struct iio_dmabuf_priv *priv; guard(mutex)(&buffer->dmabufs_mutex); list_for_each_entry(priv, &buffer->dmabufs, entry) { if (priv->attach->dev == dev && priv->attach->dmabuf == dmabuf) { attach = priv->attach; break; } } if (attach) iio_buffer_dmabuf_get(attach); return attach ?: ERR_PTR(-EPERM); } static int iio_buffer_attach_dmabuf(struct iio_dev_buffer_pair *ib, int __user *user_fd, bool nonblock) { struct iio_dev *indio_dev = ib->indio_dev; struct iio_buffer *buffer = ib->buffer; struct dma_buf_attachment *attach; struct iio_dmabuf_priv *priv, *each; struct dma_buf *dmabuf; int err, fd; if (!buffer->access->attach_dmabuf || !buffer->access->detach_dmabuf || !buffer->access->enqueue_dmabuf) return -EPERM; if (copy_from_user(&fd, user_fd, sizeof(fd))) return -EFAULT; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; spin_lock_init(&priv->lock); priv->context = dma_fence_context_alloc(1); dmabuf = dma_buf_get(fd); if (IS_ERR(dmabuf)) { err = PTR_ERR(dmabuf); goto err_free_priv; } attach = dma_buf_attach(dmabuf, indio_dev->dev.parent); if (IS_ERR(attach)) { err = PTR_ERR(attach); goto err_dmabuf_put; } err = iio_dma_resv_lock(dmabuf, nonblock); if (err) goto err_dmabuf_detach; priv->dir = buffer->direction == IIO_BUFFER_DIRECTION_IN ? DMA_FROM_DEVICE : DMA_TO_DEVICE; priv->sgt = dma_buf_map_attachment(attach, priv->dir); if (IS_ERR(priv->sgt)) { err = PTR_ERR(priv->sgt); dev_err(&indio_dev->dev, "Unable to map attachment: %d\n", err); goto err_resv_unlock; } kref_init(&priv->ref); priv->buffer = buffer; priv->attach = attach; attach->importer_priv = priv; priv->block = buffer->access->attach_dmabuf(buffer, attach); if (IS_ERR(priv->block)) { err = PTR_ERR(priv->block); goto err_dmabuf_unmap_attachment; } dma_resv_unlock(dmabuf->resv); mutex_lock(&buffer->dmabufs_mutex); /* * Check whether we already have an attachment for this driver/DMABUF * combo. If we do, refuse to attach. */ list_for_each_entry(each, &buffer->dmabufs, entry) { if (each->attach->dev == indio_dev->dev.parent && each->attach->dmabuf == dmabuf) { /* * We unlocked the reservation object, so going through * the cleanup code would mean re-locking it first. * At this stage it is simpler to free the attachment * using iio_buffer_dma_put(). */ mutex_unlock(&buffer->dmabufs_mutex); iio_buffer_dmabuf_put(attach); return -EBUSY; } } /* Otherwise, add the new attachment to our dmabufs list. */ list_add(&priv->entry, &buffer->dmabufs); mutex_unlock(&buffer->dmabufs_mutex); return 0; err_dmabuf_unmap_attachment: dma_buf_unmap_attachment(attach, priv->sgt, priv->dir); err_resv_unlock: dma_resv_unlock(dmabuf->resv); err_dmabuf_detach: dma_buf_detach(dmabuf, attach); err_dmabuf_put: dma_buf_put(dmabuf); err_free_priv: kfree(priv); return err; } static int iio_buffer_detach_dmabuf(struct iio_dev_buffer_pair *ib, int __user *user_req, bool nonblock) { struct iio_buffer *buffer = ib->buffer; struct iio_dev *indio_dev = ib->indio_dev; struct iio_dmabuf_priv *priv; struct dma_buf *dmabuf; int dmabuf_fd, ret = -EPERM; if (copy_from_user(&dmabuf_fd, user_req, sizeof(dmabuf_fd))) return -EFAULT; dmabuf = dma_buf_get(dmabuf_fd); if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf); guard(mutex)(&buffer->dmabufs_mutex); list_for_each_entry(priv, &buffer->dmabufs, entry) { if (priv->attach->dev == indio_dev->dev.parent && priv->attach->dmabuf == dmabuf) { list_del(&priv->entry); /* Unref the reference from iio_buffer_attach_dmabuf() */ iio_buffer_dmabuf_put(priv->attach); ret = 0; break; } } dma_buf_put(dmabuf); return ret; } static const char * iio_buffer_dma_fence_get_driver_name(struct dma_fence *fence) { return "iio"; } static void iio_buffer_dma_fence_release(struct dma_fence *fence) { struct iio_dma_fence *iio_fence = container_of(fence, struct iio_dma_fence, base); kfree(iio_fence); } static const struct dma_fence_ops iio_buffer_dma_fence_ops = { .get_driver_name = iio_buffer_dma_fence_get_driver_name, .get_timeline_name = iio_buffer_dma_fence_get_driver_name, .release = iio_buffer_dma_fence_release, }; static int iio_buffer_enqueue_dmabuf(struct iio_dev_buffer_pair *ib, struct iio_dmabuf __user *iio_dmabuf_req, bool nonblock) { struct iio_buffer *buffer = ib->buffer; struct iio_dmabuf iio_dmabuf; struct dma_buf_attachment *attach; struct iio_dmabuf_priv *priv; struct iio_dma_fence *fence; struct dma_buf *dmabuf; unsigned long timeout; bool cookie, cyclic, dma_to_ram; long retl; u32 seqno; int ret; if (copy_from_user(&iio_dmabuf, iio_dmabuf_req, sizeof(iio_dmabuf))) return -EFAULT; if (iio_dmabuf.flags & ~IIO_BUFFER_DMABUF_SUPPORTED_FLAGS) return -EINVAL; cyclic = iio_dmabuf.flags & IIO_BUFFER_DMABUF_CYCLIC; /* Cyclic flag is only supported on output buffers */ if (cyclic && buffer->direction != IIO_BUFFER_DIRECTION_OUT) return -EINVAL; dmabuf = dma_buf_get(iio_dmabuf.fd); if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf); if (!iio_dmabuf.bytes_used || iio_dmabuf.bytes_used > dmabuf->size) { ret = -EINVAL; goto err_dmabuf_put; } attach = iio_buffer_find_attachment(ib, dmabuf, nonblock); if (IS_ERR(attach)) { ret = PTR_ERR(attach); goto err_dmabuf_put; } priv = attach->importer_priv; fence = kmalloc(sizeof(*fence), GFP_KERNEL); if (!fence) { ret = -ENOMEM; goto err_attachment_put; } fence->priv = priv; seqno = atomic_add_return(1, &priv->seqno); /* * The transfers are guaranteed to be processed in the order they are * enqueued, so we can use a simple incrementing sequence number for * the dma_fence. */ dma_fence_init(&fence->base, &iio_buffer_dma_fence_ops, &priv->lock, priv->context, seqno); ret = iio_dma_resv_lock(dmabuf, nonblock); if (ret) goto err_fence_put; timeout = nonblock ? 0 : msecs_to_jiffies(DMABUF_ENQUEUE_TIMEOUT_MS); dma_to_ram = buffer->direction == IIO_BUFFER_DIRECTION_IN; /* Make sure we don't have writers */ retl = dma_resv_wait_timeout(dmabuf->resv, dma_resv_usage_rw(dma_to_ram), true, timeout); if (retl == 0) retl = -EBUSY; if (retl < 0) { ret = (int)retl; goto err_resv_unlock; } if (buffer->access->lock_queue) buffer->access->lock_queue(buffer); ret = dma_resv_reserve_fences(dmabuf->resv, 1); if (ret) goto err_queue_unlock; dma_resv_add_fence(dmabuf->resv, &fence->base, dma_to_ram ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); dma_resv_unlock(dmabuf->resv); cookie = dma_fence_begin_signalling(); ret = buffer->access->enqueue_dmabuf(buffer, priv->block, &fence->base, priv->sgt, iio_dmabuf.bytes_used, cyclic); if (ret) { /* * DMABUF enqueue failed, but we already added the fence. * Signal the error through the fence completion mechanism. */ iio_buffer_signal_dmabuf_done(&fence->base, ret); } if (buffer->access->unlock_queue) buffer->access->unlock_queue(buffer); dma_fence_end_signalling(cookie); dma_buf_put(dmabuf); return ret; err_queue_unlock: if (buffer->access->unlock_queue) buffer->access->unlock_queue(buffer); err_resv_unlock: dma_resv_unlock(dmabuf->resv); err_fence_put: dma_fence_put(&fence->base); err_attachment_put: iio_buffer_dmabuf_put(attach); err_dmabuf_put: dma_buf_put(dmabuf); return ret; } static void iio_buffer_cleanup(struct work_struct *work) { struct iio_dma_fence *fence = container_of(work, struct iio_dma_fence, work); struct iio_dmabuf_priv *priv = fence->priv; struct dma_buf_attachment *attach = priv->attach; dma_fence_put(&fence->base); iio_buffer_dmabuf_put(attach); } void iio_buffer_signal_dmabuf_done(struct dma_fence *fence, int ret) { struct iio_dma_fence *iio_fence = container_of(fence, struct iio_dma_fence, base); bool cookie = dma_fence_begin_signalling(); /* * Get a reference to the fence, so that it's not freed as soon as * it's signaled. */ dma_fence_get(fence); fence->error = ret; dma_fence_signal(fence); dma_fence_end_signalling(cookie); /* * The fence will be unref'd in iio_buffer_cleanup. * It can't be done here, as the unref functions might try to lock the * resv object, which can deadlock. */ INIT_WORK(&iio_fence->work, iio_buffer_cleanup); schedule_work(&iio_fence->work); } EXPORT_SYMBOL_GPL(iio_buffer_signal_dmabuf_done); static long iio_buffer_chrdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct iio_dev_buffer_pair *ib = filp->private_data; void __user *_arg = (void __user *)arg; bool nonblock = filp->f_flags & O_NONBLOCK; switch (cmd) { case IIO_BUFFER_DMABUF_ATTACH_IOCTL: return iio_buffer_attach_dmabuf(ib, _arg, nonblock); case IIO_BUFFER_DMABUF_DETACH_IOCTL: return iio_buffer_detach_dmabuf(ib, _arg, nonblock); case IIO_BUFFER_DMABUF_ENQUEUE_IOCTL: return iio_buffer_enqueue_dmabuf(ib, _arg, nonblock); default: return -EINVAL; } } static const struct file_operations iio_buffer_chrdev_fileops = { .owner = THIS_MODULE, .llseek = noop_llseek, .read = iio_buffer_read, .write = iio_buffer_write, .unlocked_ioctl = iio_buffer_chrdev_ioctl, .compat_ioctl = compat_ptr_ioctl, .poll = iio_buffer_poll, .release = iio_buffer_chrdev_release, }; static long iio_device_buffer_getfd(struct iio_dev *indio_dev, unsigned long arg) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); int __user *ival = (int __user *)arg; struct iio_dev_buffer_pair *ib; struct iio_buffer *buffer; int fd, idx, ret; if (copy_from_user(&idx, ival, sizeof(idx))) return -EFAULT; if (idx >= iio_dev_opaque->attached_buffers_cnt) return -ENODEV; iio_device_get(indio_dev); buffer = iio_dev_opaque->attached_buffers[idx]; if (test_and_set_bit(IIO_BUSY_BIT_POS, &buffer->flags)) { ret = -EBUSY; goto error_iio_dev_put; } ib = kzalloc(sizeof(*ib), GFP_KERNEL); if (!ib) { ret = -ENOMEM; goto error_clear_busy_bit; } ib->indio_dev = indio_dev; ib->buffer = buffer; fd = anon_inode_getfd("iio:buffer", &iio_buffer_chrdev_fileops, ib, O_RDWR | O_CLOEXEC); if (fd < 0) { ret = fd; goto error_free_ib; } if (copy_to_user(ival, &fd, sizeof(fd))) { /* * "Leak" the fd, as there's not much we can do about this * anyway. 'fd' might have been closed already, as * anon_inode_getfd() called fd_install() on it, which made * it reachable by userland. * * Instead of allowing a malicious user to play tricks with * us, rely on the process exit path to do any necessary * cleanup, as in releasing the file, if still needed. */ return -EFAULT; } return 0; error_free_ib: kfree(ib); error_clear_busy_bit: clear_bit(IIO_BUSY_BIT_POS, &buffer->flags); error_iio_dev_put: iio_device_put(indio_dev); return ret; } static long iio_device_buffer_ioctl(struct iio_dev *indio_dev, struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IIO_BUFFER_GET_FD_IOCTL: return iio_device_buffer_getfd(indio_dev, arg); default: return IIO_IOCTL_UNHANDLED; } } static int iio_channel_validate_scan_type(struct device *dev, int ch, const struct iio_scan_type *scan_type) { /* Verify that sample bits fit into storage */ if (scan_type->storagebits < scan_type->realbits + scan_type->shift) { dev_err(dev, "Channel %d storagebits (%d) < shifted realbits (%d + %d)\n", ch, scan_type->storagebits, scan_type->realbits, scan_type->shift); return -EINVAL; } return 0; } static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer, struct iio_dev *indio_dev, int index) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); unsigned int masklength = iio_get_masklength(indio_dev); struct iio_dev_attr *p; const struct iio_dev_attr *id_attr; struct attribute **attr; int ret, i, attrn, scan_el_attrcount, buffer_attrcount; const struct iio_chan_spec *channels; buffer_attrcount = 0; if (buffer->attrs) { while (buffer->attrs[buffer_attrcount]) buffer_attrcount++; } buffer_attrcount += ARRAY_SIZE(iio_buffer_attrs); scan_el_attrcount = 0; INIT_LIST_HEAD(&buffer->buffer_attr_list); channels = indio_dev->channels; if (channels) { /* new magic */ for (i = 0; i < indio_dev->num_channels; i++) { const struct iio_scan_type *scan_type; if (channels[i].scan_index < 0) continue; if (channels[i].has_ext_scan_type) { int j; /* * get_current_scan_type is required when using * extended scan types. */ if (!indio_dev->info->get_current_scan_type) { ret = -EINVAL; goto error_cleanup_dynamic; } for (j = 0; j < channels[i].num_ext_scan_type; j++) { scan_type = &channels[i].ext_scan_type[j]; ret = iio_channel_validate_scan_type( &indio_dev->dev, i, scan_type); if (ret) goto error_cleanup_dynamic; } } else { scan_type = &channels[i].scan_type; ret = iio_channel_validate_scan_type( &indio_dev->dev, i, scan_type); if (ret) goto error_cleanup_dynamic; } ret = iio_buffer_add_channel_sysfs(indio_dev, buffer, &channels[i]); if (ret < 0) goto error_cleanup_dynamic; scan_el_attrcount += ret; if (channels[i].type == IIO_TIMESTAMP) iio_dev_opaque->scan_index_timestamp = channels[i].scan_index; } if (masklength && !buffer->scan_mask) { buffer->scan_mask = bitmap_zalloc(masklength, GFP_KERNEL); if (!buffer->scan_mask) { ret = -ENOMEM; goto error_cleanup_dynamic; } } } attrn = buffer_attrcount + scan_el_attrcount; attr = kcalloc(attrn + 1, sizeof(*attr), GFP_KERNEL); if (!attr) { ret = -ENOMEM; goto error_free_scan_mask; } memcpy(attr, iio_buffer_attrs, sizeof(iio_buffer_attrs)); if (!buffer->access->set_length) attr[0] = &dev_attr_length_ro.attr; if (buffer->access->flags & INDIO_BUFFER_FLAG_FIXED_WATERMARK) attr[2] = &dev_attr_watermark_ro.attr; if (buffer->attrs) for (i = 0, id_attr = buffer->attrs[i]; (id_attr = buffer->attrs[i]); i++) attr[ARRAY_SIZE(iio_buffer_attrs) + i] = (struct attribute *)&id_attr->dev_attr.attr; buffer->buffer_group.attrs = attr; for (i = 0; i < buffer_attrcount; i++) { struct attribute *wrapped; wrapped = iio_buffer_wrap_attr(buffer, attr[i]); if (!wrapped) { ret = -ENOMEM; goto error_free_buffer_attrs; } attr[i] = wrapped; } attrn = 0; list_for_each_entry(p, &buffer->buffer_attr_list, l) attr[attrn++] = &p->dev_attr.attr; buffer->buffer_group.name = kasprintf(GFP_KERNEL, "buffer%d", index); if (!buffer->buffer_group.name) { ret = -ENOMEM; goto error_free_buffer_attrs; } ret = iio_device_register_sysfs_group(indio_dev, &buffer->buffer_group); if (ret) goto error_free_buffer_attr_group_name; /* we only need to register the legacy groups for the first buffer */ if (index > 0) return 0; ret = iio_buffer_register_legacy_sysfs_groups(indio_dev, attr, buffer_attrcount, scan_el_attrcount); if (ret) goto error_free_buffer_attr_group_name; return 0; error_free_buffer_attr_group_name: kfree(buffer->buffer_group.name); error_free_buffer_attrs: kfree(buffer->buffer_group.attrs); error_free_scan_mask: bitmap_free(buffer->scan_mask); error_cleanup_dynamic: iio_free_chan_devattr_list(&buffer->buffer_attr_list); return ret; } static void __iio_buffer_free_sysfs_and_mask(struct iio_buffer *buffer, struct iio_dev *indio_dev, int index) { if (index == 0) iio_buffer_unregister_legacy_sysfs_groups(indio_dev); bitmap_free(buffer->scan_mask); kfree(buffer->buffer_group.name); kfree(buffer->buffer_group.attrs); iio_free_chan_devattr_list(&buffer->buffer_attr_list); } int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); const struct iio_chan_spec *channels; struct iio_buffer *buffer; int ret, i, idx; size_t sz; channels = indio_dev->channels; if (channels) { int ml = 0; for (i = 0; i < indio_dev->num_channels; i++) ml = max(ml, channels[i].scan_index + 1); ACCESS_PRIVATE(indio_dev, masklength) = ml; } if (!iio_dev_opaque->attached_buffers_cnt) return 0; for (idx = 0; idx < iio_dev_opaque->attached_buffers_cnt; idx++) { buffer = iio_dev_opaque->attached_buffers[idx]; ret = __iio_buffer_alloc_sysfs_and_mask(buffer, indio_dev, idx); if (ret) goto error_unwind_sysfs_and_mask; } sz = sizeof(*iio_dev_opaque->buffer_ioctl_handler); iio_dev_opaque->buffer_ioctl_handler = kzalloc(sz, GFP_KERNEL); if (!iio_dev_opaque->buffer_ioctl_handler) { ret = -ENOMEM; goto error_unwind_sysfs_and_mask; } iio_dev_opaque->buffer_ioctl_handler->ioctl = iio_device_buffer_ioctl; iio_device_ioctl_handler_register(indio_dev, iio_dev_opaque->buffer_ioctl_handler); return 0; error_unwind_sysfs_and_mask: while (idx--) { buffer = iio_dev_opaque->attached_buffers[idx]; __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, idx); } return ret; } void iio_buffers_free_sysfs_and_mask(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer; int i; if (!iio_dev_opaque->attached_buffers_cnt) return; iio_device_ioctl_handler_unregister(iio_dev_opaque->buffer_ioctl_handler); kfree(iio_dev_opaque->buffer_ioctl_handler); for (i = iio_dev_opaque->attached_buffers_cnt - 1; i >= 0; i--) { buffer = iio_dev_opaque->attached_buffers[i]; __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, i); } } /** * iio_validate_scan_mask_onehot() - Validates that exactly one channel is selected * @indio_dev: the iio device * @mask: scan mask to be checked * * Return true if exactly one bit is set in the scan mask, false otherwise. It * can be used for devices where only one channel can be active for sampling at * a time. */ bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev, const unsigned long *mask) { return bitmap_weight(mask, iio_get_masklength(indio_dev)) == 1; } EXPORT_SYMBOL_GPL(iio_validate_scan_mask_onehot); static const void *iio_demux(struct iio_buffer *buffer, const void *datain) { struct iio_demux_table *t; if (list_empty(&buffer->demux_list)) return datain; list_for_each_entry(t, &buffer->demux_list, l) memcpy(buffer->demux_bounce + t->to, datain + t->from, t->length); return buffer->demux_bounce; } static int iio_push_to_buffer(struct iio_buffer *buffer, const void *data) { const void *dataout = iio_demux(buffer, data); int ret; ret = buffer->access->store_to(buffer, dataout); if (ret) return ret; /* * We can't just test for watermark to decide if we wake the poll queue * because read may request less samples than the watermark. */ wake_up_interruptible_poll(&buffer->pollq, EPOLLIN | EPOLLRDNORM); return 0; } /** * iio_push_to_buffers() - push to a registered buffer. * @indio_dev: iio_dev structure for device. * @data: Full scan. */ int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); int ret; struct iio_buffer *buf; list_for_each_entry(buf, &iio_dev_opaque->buffer_list, buffer_list) { ret = iio_push_to_buffer(buf, data); if (ret < 0) return ret; } return 0; } EXPORT_SYMBOL_GPL(iio_push_to_buffers); /** * iio_push_to_buffers_with_ts_unaligned() - push to registered buffer, * no alignment or space requirements. * @indio_dev: iio_dev structure for device. * @data: channel data excluding the timestamp. * @data_sz: size of data. * @timestamp: timestamp for the sample data. * * This special variant of iio_push_to_buffers_with_timestamp() does * not require space for the timestamp, or 8 byte alignment of data. * It does however require an allocation on first call and additional * copies on all calls, so should be avoided if possible. */ int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev, const void *data, size_t data_sz, int64_t timestamp) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); /* * Conservative estimate - we can always safely copy the minimum * of either the data provided or the length of the destination buffer. * This relaxed limit allows the calling drivers to be lax about * tracking the size of the data they are pushing, at the cost of * unnecessary copying of padding. */ data_sz = min_t(size_t, indio_dev->scan_bytes, data_sz); if (iio_dev_opaque->bounce_buffer_size != indio_dev->scan_bytes) { void *bb; bb = devm_krealloc(&indio_dev->dev, iio_dev_opaque->bounce_buffer, indio_dev->scan_bytes, GFP_KERNEL); if (!bb) return -ENOMEM; iio_dev_opaque->bounce_buffer = bb; iio_dev_opaque->bounce_buffer_size = indio_dev->scan_bytes; } memcpy(iio_dev_opaque->bounce_buffer, data, data_sz); return iio_push_to_buffers_with_timestamp(indio_dev, iio_dev_opaque->bounce_buffer, timestamp); } EXPORT_SYMBOL_GPL(iio_push_to_buffers_with_ts_unaligned); /** * iio_buffer_release() - Free a buffer's resources * @ref: Pointer to the kref embedded in the iio_buffer struct * * This function is called when the last reference to the buffer has been * dropped. It will typically free all resources allocated by the buffer. Do not * call this function manually, always use iio_buffer_put() when done using a * buffer. */ static void iio_buffer_release(struct kref *ref) { struct iio_buffer *buffer = container_of(ref, struct iio_buffer, ref); mutex_destroy(&buffer->dmabufs_mutex); buffer->access->release(buffer); } /** * iio_buffer_get() - Grab a reference to the buffer * @buffer: The buffer to grab a reference for, may be NULL * * Returns the pointer to the buffer that was passed into the function. */ struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer) { if (buffer) kref_get(&buffer->ref); return buffer; } EXPORT_SYMBOL_GPL(iio_buffer_get); /** * iio_buffer_put() - Release the reference to the buffer * @buffer: The buffer to release the reference for, may be NULL */ void iio_buffer_put(struct iio_buffer *buffer) { if (buffer) kref_put(&buffer->ref, iio_buffer_release); } EXPORT_SYMBOL_GPL(iio_buffer_put); /** * iio_device_attach_buffer - Attach a buffer to a IIO device * @indio_dev: The device the buffer should be attached to * @buffer: The buffer to attach to the device * * Return 0 if successful, negative if error. * * This function attaches a buffer to a IIO device. The buffer stays attached to * the device until the device is freed. For legacy reasons, the first attached * buffer will also be assigned to 'indio_dev->buffer'. * The array allocated here, will be free'd via the iio_device_detach_buffers() * call which is handled by the iio_device_free(). */ int iio_device_attach_buffer(struct iio_dev *indio_dev, struct iio_buffer *buffer) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer **new, **old = iio_dev_opaque->attached_buffers; unsigned int cnt = iio_dev_opaque->attached_buffers_cnt; cnt++; new = krealloc(old, sizeof(*new) * cnt, GFP_KERNEL); if (!new) return -ENOMEM; iio_dev_opaque->attached_buffers = new; buffer = iio_buffer_get(buffer); /* first buffer is legacy; attach it to the IIO device directly */ if (!indio_dev->buffer) indio_dev->buffer = buffer; iio_dev_opaque->attached_buffers[cnt - 1] = buffer; iio_dev_opaque->attached_buffers_cnt = cnt; return 0; } EXPORT_SYMBOL_GPL(iio_device_attach_buffer);
1 90 2049 7 290 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_RTNETLINK_H #define __LINUX_RTNETLINK_H #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/wait.h> #include <linux/refcount.h> #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo) { return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo); } extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, const struct nlmsghdr *nlh, gfp_t flags); extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags, u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, gfp_t flags, int *new_nsid, int new_ifindex, u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags, u32 portid, const struct nlmsghdr *nlh); /* RTNL is used as a global lock for all changes to network configuration */ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; #define ASSERT_RTNL() \ WARN_ONCE(!rtnl_is_locked(), \ "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else static inline bool lockdep_rtnl_is_held(void) { return true; } #endif /* #ifdef CONFIG_PROVE_LOCKING */ /** * rcu_dereference_rtnl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference() */ #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, lockdep_rtnl_is_held()) /** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * * Return: the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) /** * rcu_replace_pointer_rtnl - replace an RCU pointer under rtnl_lock, returning * its old value * @rp: RCU pointer, whose value is returned * @p: regular pointer * * Perform a replacement under rtnl_lock, where @rp is an RCU-annotated * pointer. The old value of @rp is returned, and @rp is set to @p */ #define rcu_replace_pointer_rtnl(rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) #ifdef CONFIG_DEBUG_NET_SMALL_RTNL void __rtnl_net_lock(struct net *net); void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_trylock(struct net *net); int rtnl_net_lock_killable(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); bool rtnl_net_is_locked(struct net *net); #define ASSERT_RTNL_NET(net) \ WARN_ONCE(!rtnl_net_is_locked(net), \ "RTNL_NET: assertion failed at %s (%d)\n", \ __FILE__, __LINE__) bool lockdep_rtnl_net_is_held(struct net *net); #define rcu_dereference_rtnl_net(net, p) \ rcu_dereference_check(p, lockdep_rtnl_net_is_held(net)) #define rtnl_net_dereference(net, p) \ rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net)) #define rcu_replace_pointer_rtnl_net(net, rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net)) #else static inline void __rtnl_net_lock(struct net *net) {} static inline void __rtnl_net_unlock(struct net *net) {} static inline void rtnl_net_lock(struct net *net) { rtnl_lock(); } static inline void rtnl_net_unlock(struct net *net) { rtnl_unlock(); } static inline int rtnl_net_trylock(struct net *net) { return rtnl_trylock(); } static inline int rtnl_net_lock_killable(struct net *net) { return rtnl_lock_killable(); } static inline void ASSERT_RTNL_NET(struct net *net) { ASSERT_RTNL(); } #define rcu_dereference_rtnl_net(net, p) \ rcu_dereference_rtnl(p) #define rtnl_net_dereference(net, p) \ rtnl_dereference(p) #define rcu_replace_pointer_rtnl_net(net, rp, p) \ rcu_replace_pointer_rtnl(rp, p) #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); } static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) { return rcu_dereference(dev->ingress_queue); } struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif #ifdef CONFIG_NET_EGRESS void net_inc_egress_queue(void); void net_dec_egress_queue(void); void netdev_xmit_skip_txqueue(bool skip); #endif void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); /* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ struct ndo_fdb_dump_context { unsigned long ifindex; unsigned long fdb_idx; }; extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags); extern int ndo_dflt_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, u32 flags, u32 mask, int nlflags, u32 filter_mask, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)); extern void rtnl_offload_xstats_notify(struct net_device *dev); static inline int rtnl_has_listeners(const struct net *net, u32 group) { struct sock *rtnl = net->rtnl; return netlink_has_listeners(rtnl, group); } /** * rtnl_notify_needed - check if notification is needed * @net: Pointer to the net namespace * @nlflags: netlink ingress message flags * @group: rtnl group * * Based on the ingress message flags and rtnl group, returns true * if a notification is needed, false otherwise. */ static inline bool rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) { return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } void netdev_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 // SPDX-License-Identifier: GPL-2.0-or-later /* * Mirics MSi001 silicon tuner driver * * Copyright (C) 2013 Antti Palosaari <crope@iki.fi> * Copyright (C) 2014 Antti Palosaari <crope@iki.fi> */ #include <linux/module.h> #include <linux/gcd.h> #include <media/v4l2-device.h> #include <media/v4l2-ctrls.h> static const struct v4l2_frequency_band bands[] = { { .type = V4L2_TUNER_RF, .index = 0, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 49000000, .rangehigh = 263000000, }, { .type = V4L2_TUNER_RF, .index = 1, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 390000000, .rangehigh = 960000000, }, }; struct msi001_dev { struct spi_device *spi; struct v4l2_subdev sd; /* Controls */ struct v4l2_ctrl_handler hdl; struct v4l2_ctrl *bandwidth_auto; struct v4l2_ctrl *bandwidth; struct v4l2_ctrl *lna_gain; struct v4l2_ctrl *mixer_gain; struct v4l2_ctrl *if_gain; unsigned int f_tuner; }; static inline struct msi001_dev *sd_to_msi001_dev(struct v4l2_subdev *sd) { return container_of(sd, struct msi001_dev, sd); } static int msi001_wreg(struct msi001_dev *dev, u32 data) { /* Register format: 4 bits addr + 20 bits value */ return spi_write(dev->spi, &data, 3); }; static int msi001_set_gain(struct msi001_dev *dev, int lna_gain, int mixer_gain, int if_gain) { struct spi_device *spi = dev->spi; int ret; u32 reg; dev_dbg(&spi->dev, "lna=%d mixer=%d if=%d\n", lna_gain, mixer_gain, if_gain); reg = 1 << 0; reg |= (59 - if_gain) << 4; reg |= 0 << 10; reg |= (1 - mixer_gain) << 12; reg |= (1 - lna_gain) << 13; reg |= 4 << 14; reg |= 0 << 17; ret = msi001_wreg(dev, reg); if (ret) goto err; return 0; err: dev_dbg(&spi->dev, "failed %d\n", ret); return ret; }; static int msi001_set_tuner(struct msi001_dev *dev) { struct spi_device *spi = dev->spi; int ret, i; unsigned int uitmp, div_n, k, k_thresh, k_frac, div_lo, f_if1; u32 reg; u64 f_vco; u8 mode, filter_mode; static const struct { u32 rf; u8 mode; u8 div_lo; } band_lut[] = { { 50000000, 0xe1, 16}, /* AM_MODE2, antenna 2 */ {108000000, 0x42, 32}, /* VHF_MODE */ {330000000, 0x44, 16}, /* B3_MODE */ {960000000, 0x48, 4}, /* B45_MODE */ { ~0U, 0x50, 2}, /* BL_MODE */ }; static const struct { u32 freq; u8 filter_mode; } if_freq_lut[] = { { 0, 0x03}, /* Zero IF */ { 450000, 0x02}, /* 450 kHz IF */ {1620000, 0x01}, /* 1.62 MHz IF */ {2048000, 0x00}, /* 2.048 MHz IF */ }; static const struct { u32 freq; u8 val; } bandwidth_lut[] = { { 200000, 0x00}, /* 200 kHz */ { 300000, 0x01}, /* 300 kHz */ { 600000, 0x02}, /* 600 kHz */ {1536000, 0x03}, /* 1.536 MHz */ {5000000, 0x04}, /* 5 MHz */ {6000000, 0x05}, /* 6 MHz */ {7000000, 0x06}, /* 7 MHz */ {8000000, 0x07}, /* 8 MHz */ }; unsigned int f_rf = dev->f_tuner; /* * bandwidth (Hz) * 200000, 300000, 600000, 1536000, 5000000, 6000000, 7000000, 8000000 */ unsigned int bandwidth; /* * intermediate frequency (Hz) * 0, 450000, 1620000, 2048000 */ unsigned int f_if = 0; #define F_REF 24000000 #define DIV_PRE_N 4 #define F_VCO_STEP div_lo dev_dbg(&spi->dev, "f_rf=%d f_if=%d\n", f_rf, f_if); for (i = 0; i < ARRAY_SIZE(band_lut); i++) { if (f_rf <= band_lut[i].rf) { mode = band_lut[i].mode; div_lo = band_lut[i].div_lo; break; } } if (i == ARRAY_SIZE(band_lut)) { ret = -EINVAL; goto err; } /* AM_MODE is upconverted */ if ((mode >> 0) & 0x1) f_if1 = 5 * F_REF; else f_if1 = 0; for (i = 0; i < ARRAY_SIZE(if_freq_lut); i++) { if (f_if == if_freq_lut[i].freq) { filter_mode = if_freq_lut[i].filter_mode; break; } } if (i == ARRAY_SIZE(if_freq_lut)) { ret = -EINVAL; goto err; } /* filters */ bandwidth = dev->bandwidth->val; bandwidth = clamp(bandwidth, 200000U, 8000000U); for (i = 0; i < ARRAY_SIZE(bandwidth_lut); i++) { if (bandwidth <= bandwidth_lut[i].freq) { bandwidth = bandwidth_lut[i].val; break; } } if (i == ARRAY_SIZE(bandwidth_lut)) { ret = -EINVAL; goto err; } dev->bandwidth->val = bandwidth_lut[i].freq; dev_dbg(&spi->dev, "bandwidth selected=%d\n", bandwidth_lut[i].freq); /* * Fractional-N synthesizer * * +---------------------------------------+ * v | * Fref +----+ +-------+ +----+ +------+ +---+ * ------> | PD | --> | VCO | ------> | /4 | --> | /N.F | <-- | K | * +----+ +-------+ +----+ +------+ +---+ * | * | * v * +-------+ Fout * | /Rout | ------> * +-------+ */ /* Calculate PLL integer and fractional control word. */ f_vco = (u64) (f_rf + f_if + f_if1) * div_lo; div_n = div_u64_rem(f_vco, DIV_PRE_N * F_REF, &k); k_thresh = (DIV_PRE_N * F_REF) / F_VCO_STEP; k_frac = div_u64((u64) k * k_thresh, (DIV_PRE_N * F_REF)); /* Find out greatest common divisor and divide to smaller. */ uitmp = gcd(k_thresh, k_frac); k_thresh /= uitmp; k_frac /= uitmp; /* Force divide to reg max. Resolution will be reduced. */ uitmp = DIV_ROUND_UP(k_thresh, 4095); k_thresh = DIV_ROUND_CLOSEST(k_thresh, uitmp); k_frac = DIV_ROUND_CLOSEST(k_frac, uitmp); /* Calculate real RF set. */ uitmp = (unsigned int) F_REF * DIV_PRE_N * div_n; uitmp += (unsigned int) F_REF * DIV_PRE_N * k_frac / k_thresh; uitmp /= div_lo; dev_dbg(&spi->dev, "f_rf=%u:%u f_vco=%llu div_n=%u k_thresh=%u k_frac=%u div_lo=%u\n", f_rf, uitmp, f_vco, div_n, k_thresh, k_frac, div_lo); ret = msi001_wreg(dev, 0x00000e); if (ret) goto err; ret = msi001_wreg(dev, 0x000003); if (ret) goto err; reg = 0 << 0; reg |= mode << 4; reg |= filter_mode << 12; reg |= bandwidth << 14; reg |= 0x02 << 17; reg |= 0x00 << 20; ret = msi001_wreg(dev, reg); if (ret) goto err; reg = 5 << 0; reg |= k_thresh << 4; reg |= 1 << 19; reg |= 1 << 21; ret = msi001_wreg(dev, reg); if (ret) goto err; reg = 2 << 0; reg |= k_frac << 4; reg |= div_n << 16; ret = msi001_wreg(dev, reg); if (ret) goto err; ret = msi001_set_gain(dev, dev->lna_gain->cur.val, dev->mixer_gain->cur.val, dev->if_gain->cur.val); if (ret) goto err; reg = 6 << 0; reg |= 63 << 4; reg |= 4095 << 10; ret = msi001_wreg(dev, reg); if (ret) goto err; return 0; err: dev_dbg(&spi->dev, "failed %d\n", ret); return ret; } static int msi001_standby(struct v4l2_subdev *sd) { struct msi001_dev *dev = sd_to_msi001_dev(sd); return msi001_wreg(dev, 0x000000); } static int msi001_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *v) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "index=%d\n", v->index); strscpy(v->name, "Mirics MSi001", sizeof(v->name)); v->type = V4L2_TUNER_RF; v->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS; v->rangelow = 49000000; v->rangehigh = 960000000; return 0; } static int msi001_s_tuner(struct v4l2_subdev *sd, const struct v4l2_tuner *v) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "index=%d\n", v->index); return 0; } static int msi001_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "tuner=%d\n", f->tuner); f->frequency = dev->f_tuner; return 0; } static int msi001_s_frequency(struct v4l2_subdev *sd, const struct v4l2_frequency *f) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; unsigned int band; dev_dbg(&spi->dev, "tuner=%d type=%d frequency=%u\n", f->tuner, f->type, f->frequency); if (f->frequency < ((bands[0].rangehigh + bands[1].rangelow) / 2)) band = 0; else band = 1; dev->f_tuner = clamp_t(unsigned int, f->frequency, bands[band].rangelow, bands[band].rangehigh); return msi001_set_tuner(dev); } static int msi001_enum_freq_bands(struct v4l2_subdev *sd, struct v4l2_frequency_band *band) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "tuner=%d type=%d index=%d\n", band->tuner, band->type, band->index); if (band->index >= ARRAY_SIZE(bands)) return -EINVAL; band->capability = bands[band->index].capability; band->rangelow = bands[band->index].rangelow; band->rangehigh = bands[band->index].rangehigh; return 0; } static const struct v4l2_subdev_tuner_ops msi001_tuner_ops = { .standby = msi001_standby, .g_tuner = msi001_g_tuner, .s_tuner = msi001_s_tuner, .g_frequency = msi001_g_frequency, .s_frequency = msi001_s_frequency, .enum_freq_bands = msi001_enum_freq_bands, }; static const struct v4l2_subdev_ops msi001_ops = { .tuner = &msi001_tuner_ops, }; static int msi001_s_ctrl(struct v4l2_ctrl *ctrl) { struct msi001_dev *dev = container_of(ctrl->handler, struct msi001_dev, hdl); struct spi_device *spi = dev->spi; int ret; dev_dbg(&spi->dev, "id=%d name=%s val=%d min=%lld max=%lld step=%lld\n", ctrl->id, ctrl->name, ctrl->val, ctrl->minimum, ctrl->maximum, ctrl->step); switch (ctrl->id) { case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO: case V4L2_CID_RF_TUNER_BANDWIDTH: ret = msi001_set_tuner(dev); break; case V4L2_CID_RF_TUNER_LNA_GAIN: ret = msi001_set_gain(dev, dev->lna_gain->val, dev->mixer_gain->cur.val, dev->if_gain->cur.val); break; case V4L2_CID_RF_TUNER_MIXER_GAIN: ret = msi001_set_gain(dev, dev->lna_gain->cur.val, dev->mixer_gain->val, dev->if_gain->cur.val); break; case V4L2_CID_RF_TUNER_IF_GAIN: ret = msi001_set_gain(dev, dev->lna_gain->cur.val, dev->mixer_gain->cur.val, dev->if_gain->val); break; default: dev_dbg(&spi->dev, "unknown control %d\n", ctrl->id); ret = -EINVAL; } return ret; } static const struct v4l2_ctrl_ops msi001_ctrl_ops = { .s_ctrl = msi001_s_ctrl, }; static int msi001_probe(struct spi_device *spi) { struct msi001_dev *dev; int ret; dev_dbg(&spi->dev, "\n"); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { ret = -ENOMEM; goto err; } dev->spi = spi; dev->f_tuner = bands[0].rangelow; v4l2_spi_subdev_init(&dev->sd, spi, &msi001_ops); /* Register controls */ v4l2_ctrl_handler_init(&dev->hdl, 5); dev->bandwidth_auto = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_BANDWIDTH_AUTO, 0, 1, 1, 1); dev->bandwidth = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_BANDWIDTH, 200000, 8000000, 1, 200000); if (dev->hdl.error) { ret = dev->hdl.error; dev_err(&spi->dev, "Could not initialize controls\n"); /* control init failed, free handler */ goto err_ctrl_handler_free; } v4l2_ctrl_auto_cluster(2, &dev->bandwidth_auto, 0, false); dev->lna_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_LNA_GAIN, 0, 1, 1, 1); dev->mixer_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_MIXER_GAIN, 0, 1, 1, 1); dev->if_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_IF_GAIN, 0, 59, 1, 0); if (dev->hdl.error) { ret = dev->hdl.error; dev_err(&spi->dev, "Could not initialize controls\n"); /* control init failed, free handler */ goto err_ctrl_handler_free; } dev->sd.ctrl_handler = &dev->hdl; return 0; err_ctrl_handler_free: v4l2_ctrl_handler_free(&dev->hdl); kfree(dev); err: return ret; } static void msi001_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); struct msi001_dev *dev = sd_to_msi001_dev(sd); dev_dbg(&spi->dev, "\n"); /* * Registered by v4l2_spi_new_subdev() from master driver, but we must * unregister it from here. Weird. */ v4l2_device_unregister_subdev(&dev->sd); v4l2_ctrl_handler_free(&dev->hdl); kfree(dev); } static const struct spi_device_id msi001_id_table[] = { {"msi001", 0}, {} }; MODULE_DEVICE_TABLE(spi, msi001_id_table); static struct spi_driver msi001_driver = { .driver = { .name = "msi001", .suppress_bind_attrs = true, }, .probe = msi001_probe, .remove = msi001_remove, .id_table = msi001_id_table, }; module_spi_driver(msi001_driver); MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>"); MODULE_DESCRIPTION("Mirics MSi001"); MODULE_LICENSE("GPL");
47 47 12 12 59 60 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 // SPDX-License-Identifier: GPL-2.0 /* * Wifi Band Exclusion Interface for WLAN * Copyright (C) 2023 Advanced Micro Devices * */ #include <linux/acpi_amd_wbrf.h> #include <linux/units.h> #include <net/cfg80211.h> #include "ieee80211_i.h" void ieee80211_check_wbrf_support(struct ieee80211_local *local) { struct wiphy *wiphy = local->hw.wiphy; struct device *dev; if (!wiphy) return; dev = wiphy->dev.parent; if (!dev) return; local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev); } static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end) { bandwidth *= KHZ_PER_MHZ; center_freq *= KHZ_PER_MHZ; *start = center_freq - bandwidth / 2; *end = center_freq + bandwidth / 2; /* Frequency in Hz is expected */ *start = *start * HZ_PER_KHZ; *end = *end * HZ_PER_KHZ; } static void get_ranges_from_chandef(struct cfg80211_chan_def *chandef, struct wbrf_ranges_in_out *ranges_in) { u64 start_freq1, end_freq1; u64 start_freq2, end_freq2; int bandwidth; bandwidth = nl80211_chan_width_to_mhz(chandef->width); get_chan_freq_boundary(chandef->center_freq1, bandwidth, &start_freq1, &end_freq1); ranges_in->band_list[0].start = start_freq1; ranges_in->band_list[0].end = end_freq1; ranges_in->num_of_ranges = 1; if (chandef->width == NL80211_CHAN_WIDTH_80P80) { get_chan_freq_boundary(chandef->center_freq2, bandwidth, &start_freq2, &end_freq2); ranges_in->band_list[1].start = start_freq2; ranges_in->band_list[1].end = end_freq2; ranges_in->num_of_ranges++; } } void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef) { struct wbrf_ranges_in_out ranges_in = {0}; struct device *dev; if (!local->wbrf_supported) return; dev = local->hw.wiphy->dev.parent; get_ranges_from_chandef(chandef, &ranges_in); acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_ADD, &ranges_in); } void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef) { struct wbrf_ranges_in_out ranges_in = {0}; struct device *dev; if (!local->wbrf_supported) return; dev = local->hw.wiphy->dev.parent; get_ranges_from_chandef(chandef, &ranges_in); acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_REMOVE, &ranges_in); }
107 20 107 247 267 3 298 45 298 11 176 77 64 34 298 45 178 178 213 214 214 206 96 104 255 28 249 248 150 136 135 93 88 55 175 176 176 45 64 23 7 45 45 45 111 111 111 23 110 23 111 23 12 21 21 21 14 366 324 87 3 177 176 279 298 355 355 344 298 298 298 298 138 46 142 237 85 276 343 4 245 245 4 223 4 2 65 53 51 51 3 65 35 13 25 83 25 60 51 83 9 54 48 4 4 6 12 227 24 30 16 18 151 152 24 20 9 20 11 5 3 25 11 131 131 22 7 3 22 2 1 4 2 23 2 5 129 80 10 49 5 19 60 8 38 252 3390 3152 296 146 291 223 81 145 222 2 219 1 35 187 3348 124 118 6 97 97 2408 210 2360 226 14 21 35 2324 2257 211 2078 102 99 4 98 102 3 1 1 4 95 102 102 2649 2553 128 121 128 5 5 1 4 128 128 171 170 171 1 1 1 2036 1983 57 1932 64 64 64 63 3 60 3 61 64 64 1992 1924 74 73 74 70 70 73 74 39 2 14 23 35 35 35 31 1 32 2 35 35 32 32 32 32 32 35 35 35 35 35 32 3 3 3 67 36 15 17 5 15 18 2 4 4 10 10 8 8 431 72 374 31 58 67 3 30 136 136 80 66 58 13 8 59 59 65 64 64 64 66 8 17 58 103 6 1 247 2 244 242 3 269 269 270 4 267 266 267 12 259 15 245 245 243 2 253 253 219 31 9 9 9 9 3 1 9 7 7 44 44 9 1 6 4 2 2 4 4 5 1 3 4 2 2 2 16 10 4 6 19 8 1 10 44 32 28 16 27 15 27 15 20 21 15 26 15 26 15 18 23 15 26 14 26 15 19 22 29 12 8 21 7 5 10 18 6 7 36 5 47 3 44 5 5 5 5 5 3 2 2 3 5 7 7 6 3 3 4 2 2 4 3 3 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 // SPDX-License-Identifier: GPL-2.0 /* * Implementation of the diskquota system for the LINUX operating system. QUOTA * is implemented using the BSD system call interface as the means of * communication with the user level. This file contains the generic routines * called by the different filesystems on allocation of an inode or block. * These routines take care of the administration needed to have a consistent * diskquota tracking system. The ideas of both user and group quotas are based * on the Melbourne quota system as used on BSD derived systems. The internal * implementation is based on one of the several variants of the LINUX * inode-subsystem with added complexity of the diskquota system. * * Author: Marco van Wieringen <mvw@planets.elm.net> * * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 * * Revised list management to avoid races * -- Bill Hawes, <whawes@star.net>, 9/98 * * Fixed races in dquot_transfer(), dqget() and dquot_alloc_...(). * As the consequence the locking was moved from dquot_decr_...(), * dquot_incr_...() to calling functions. * invalidate_dquots() now writes modified dquots. * Serialized quota_off() and quota_on() for mount point. * Fixed a few bugs in grow_dquots(). * Fixed deadlock in write_dquot() - we no longer account quotas on * quota files * remove_dquot_ref() moved to inode.c - it now traverses through inodes * add_dquot_ref() restarts after blocking * Added check for bogus uid and fixed check for group in quotactl. * Jan Kara, <jack@suse.cz>, sponsored by SuSE CR, 10-11/99 * * Used struct list_head instead of own list struct * Invalidation of referenced dquots is no longer possible * Improved free_dquots list management * Quota and i_blocks are now updated in one place to avoid races * Warnings are now delayed so we won't block in critical section * Write updated not to require dquot lock * Jan Kara, <jack@suse.cz>, 9/2000 * * Added dynamic quota structure allocation * Jan Kara <jack@suse.cz> 12/2000 * * Rewritten quota interface. Implemented new quota format and * formats registering. * Jan Kara, <jack@suse.cz>, 2001,2002 * * New SMP locking. * Jan Kara, <jack@suse.cz>, 10/2002 * * Added journalled quota support, fix lock inversion problems * Jan Kara, <jack@suse.cz>, 2003,2004 * * (C) Copyright 1994 - 1997 Marco van Wieringen */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/mm.h> #include <linux/time.h> #include <linux/types.h> #include <linux/string.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/tty.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/kmod.h> #include <linux/namei.h> #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/blkdev.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> /* * There are five quota SMP locks: * * dq_list_lock protects all lists with quotas and quota formats. * * dquot->dq_dqb_lock protects data from dq_dqb * * inode->i_lock protects inode->i_blocks, i_bytes and also guards * consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that * dquot_transfer() can stabilize amount it transfers * * dq_data_lock protects mem_dqinfo structures and modifications of dquot * pointers in the inode * * dq_state_lock protects modifications of quota state (on quotaon and * quotaoff) and readers who care about latest values take it as well. * * The spinlock ordering is hence: * dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock, * dq_list_lock > dq_state_lock * * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock * * Operation accessing dquots via inode pointers are protected by dquot_srcu. * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and * synchronize_srcu(&dquot_srcu) is called after clearing pointers from * inode and before dropping dquot references to avoid use of dquots after * they are freed. dq_data_lock is used to serialize the pointer setting and * clearing operations. * Special care needs to be taken about S_NOQUOTA inode flag (marking that * inode is a quota file). Functions adding pointers from inode to dquots have * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they * have to do all pointer modifications before dropping dq_data_lock. This makes * sure they cannot race with quotaon which first sets S_NOQUOTA flag and * then drops all pointers to dquots from an inode. * * Each dquot has its dq_lock mutex. Dquot is locked when it is being read to * memory (or space for it is being allocated) on the first dqget(), when it is * being written out, and when it is being released on the last dqput(). The * allocation and release operations are serialized by the dq_lock and by * checking the use count in dquot_release(). * * Lock ordering (including related VFS locks) is the following: * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_sem */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); DEFINE_STATIC_SRCU(dquot_srcu); static DECLARE_WAIT_QUEUE_HEAD(dquot_ref_wq); void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...) { if (printk_ratelimit()) { va_list args; struct va_format vaf; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_ERR "Quota error (device %s): %s: %pV\n", sb->s_id, func, &vaf); va_end(args); } } EXPORT_SYMBOL(__quota_error); #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) static char *quotatypes[] = INITQFNAMES; #endif static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; /* SLAB cache for dquot structures */ static struct kmem_cache *dquot_cachep; void register_quota_format(struct quota_format_type *fmt) { spin_lock(&dq_list_lock); fmt->qf_next = quota_formats; quota_formats = fmt; spin_unlock(&dq_list_lock); } EXPORT_SYMBOL(register_quota_format); void unregister_quota_format(struct quota_format_type *fmt) { struct quota_format_type **actqf; spin_lock(&dq_list_lock); for (actqf = &quota_formats; *actqf && *actqf != fmt; actqf = &(*actqf)->qf_next) ; if (*actqf) *actqf = (*actqf)->qf_next; spin_unlock(&dq_list_lock); } EXPORT_SYMBOL(unregister_quota_format); static struct quota_format_type *find_quota_format(int id) { struct quota_format_type *actqf; spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next) ; if (!actqf || !try_module_get(actqf->qf_owner)) { int qm; spin_unlock(&dq_list_lock); for (qm = 0; module_names[qm].qm_fmt_id && module_names[qm].qm_fmt_id != id; qm++) ; if (!module_names[qm].qm_fmt_id || request_module(module_names[qm].qm_mod_name)) return NULL; spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next) ; if (actqf && !try_module_get(actqf->qf_owner)) actqf = NULL; } spin_unlock(&dq_list_lock); return actqf; } static void put_quota_format(struct quota_format_type *fmt) { module_put(fmt->qf_owner); } /* * Dquot List Management: * The quota code uses five lists for dquot management: the inuse_list, * releasing_dquots, free_dquots, dqi_dirty_list, and dquot_hash[] array. * A single dquot structure may be on some of those lists, depending on * its current state. * * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. * * When the last reference of a dquot is dropped, the dquot is added to * releasing_dquots. We'll then queue work item which will call * synchronize_srcu() and after that perform the final cleanup of all the * dquots on the list. Each cleaned up dquot is moved to free_dquots list. * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot * struct. * * Unused and cleaned up dquots are in the free_dquots list and this list is * searched whenever we need an available dquot. Dquots are removed from the * list as soon as they are used again and dqstats.free_dquots gives the number * of dquots on the list. When dquot is invalidated it's completely released * from memory. * * Dirty dquots are added to the dqi_dirty_list of quota_info when mark * dirtied, and this list is searched when writing dirty dquots back to * quota file. Note that some filesystems do dirty dquot tracking on their * own (e.g. in a journal) and thus don't use dqi_dirty_list. * * Dquots with a specific identity (device, type and id) are placed on * one of the dquot_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. */ static LIST_HEAD(inuse_list); static LIST_HEAD(free_dquots); static LIST_HEAD(releasing_dquots); static unsigned int dq_hash_bits, dq_hash_mask; static struct hlist_head *dquot_hash; struct dqstats dqstats; EXPORT_SYMBOL(dqstats); static qsize_t inode_get_rsv_space(struct inode *inode); static qsize_t __inode_get_rsv_space(struct inode *inode); static int __dquot_initialize(struct inode *inode, int type); static void quota_release_workfn(struct work_struct *work); static DECLARE_DELAYED_WORK(quota_release_work, quota_release_workfn); static inline unsigned int hashfn(const struct super_block *sb, struct kqid qid) { unsigned int id = from_kqid(&init_user_ns, qid); int type = qid.type; unsigned long tmp; tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask; } /* * Following list functions expect dq_list_lock to be held */ static inline void insert_dquot_hash(struct dquot *dquot) { struct hlist_head *head; head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id); hlist_add_head(&dquot->dq_hash, head); } static inline void remove_dquot_hash(struct dquot *dquot) { hlist_del_init(&dquot->dq_hash); } static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, struct kqid qid) { struct dquot *dquot; hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash) if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid)) return dquot; return NULL; } /* Add a dquot to the tail of the free list */ static inline void put_dquot_last(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &free_dquots); dqstats_inc(DQST_FREE_DQUOTS); } static inline void put_releasing_dquots(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &releasing_dquots); set_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void remove_free_dquot(struct dquot *dquot) { if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) dqstats_dec(DQST_FREE_DQUOTS); else clear_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void put_inuse(struct dquot *dquot) { /* We add to the back of inuse list so we don't have to restart * when traversing this list and we block */ list_add_tail(&dquot->dq_inuse, &inuse_list); dqstats_inc(DQST_ALLOC_DQUOTS); } static inline void remove_inuse(struct dquot *dquot) { dqstats_dec(DQST_ALLOC_DQUOTS); list_del(&dquot->dq_inuse); } /* * End of list functions needing dq_list_lock */ static void wait_on_dquot(struct dquot *dquot) { mutex_lock(&dquot->dq_lock); mutex_unlock(&dquot->dq_lock); } static inline int dquot_active(struct dquot *dquot) { return test_bit(DQ_ACTIVE_B, &dquot->dq_flags); } static inline int dquot_dirty(struct dquot *dquot) { return test_bit(DQ_MOD_B, &dquot->dq_flags); } static inline int mark_dquot_dirty(struct dquot *dquot) { return dquot->dq_sb->dq_op->mark_dirty(dquot); } /* Mark dquot dirty in atomic manner, and return it's old dirty flag state */ int dquot_mark_dquot_dirty(struct dquot *dquot) { int ret = 1; if (!dquot_active(dquot)) return 0; if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags); /* If quota is dirty already, we don't have to acquire dq_list_lock */ if (dquot_dirty(dquot)) return 1; spin_lock(&dq_list_lock); if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> info[dquot->dq_id.type].dqi_dirty_list); ret = 0; } spin_unlock(&dq_list_lock); return ret; } EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots) { int ret, err, cnt; struct dquot *dquot; ret = err = 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) /* Even in case of error we have to continue */ ret = mark_dquot_dirty(dquot); if (!err && ret < 0) err = ret; } return err; } static inline void dqput_all(struct dquot **dquot) { unsigned int cnt; for (cnt = 0; cnt < MAXQUOTAS; cnt++) dqput(dquot[cnt]); } static inline int clear_dquot_dirty(struct dquot *dquot) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) return test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags); spin_lock(&dq_list_lock); if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); return 0; } list_del_init(&dquot->dq_dirty); spin_unlock(&dq_list_lock); return 1; } void mark_info_dirty(struct super_block *sb, int type) { spin_lock(&dq_data_lock); sb_dqopt(sb)->info[type].dqi_flags |= DQF_INFO_DIRTY; spin_unlock(&dq_data_lock); } EXPORT_SYMBOL(mark_info_dirty); /* * Read dquot from disk and alloc space for it */ int dquot_acquire(struct dquot *dquot) { int ret = 0, ret2 = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); if (ret < 0) goto out_iolock; } /* Make sure flags update is visible after dquot has been filled */ smp_mb__before_atomic(); set_bit(DQ_READ_B, &dquot->dq_flags); /* Instantiate dquot if needed */ if (!dquot_active(dquot) && !dquot->dq_off) { ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); /* Write the info if needed */ if (info_dirty(&dqopt->info[dquot->dq_id.type])) { ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( dquot->dq_sb, dquot->dq_id.type); } if (ret < 0) goto out_iolock; if (ret2 < 0) { ret = ret2; goto out_iolock; } } /* * Make sure flags update is visible after on-disk struct has been * allocated. Paired with smp_rmb() in dqget(). */ smp_mb__before_atomic(); set_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_iolock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_acquire); /* * Write dquot to disk */ int dquot_commit(struct dquot *dquot) { int ret = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); if (!clear_dquot_dirty(dquot)) goto out_lock; /* Inactive dquot can be only if there was error during read/init * => we have better not writing it */ if (dquot_active(dquot)) ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); else ret = -EIO; out_lock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_commit); /* * Release dquot */ int dquot_release(struct dquot *dquot) { int ret = 0, ret2 = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); /* Check whether we are not racing with some other dqget() */ if (dquot_is_busy(dquot)) goto out_dqlock; if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); /* Write the info */ if (info_dirty(&dqopt->info[dquot->dq_id.type])) { ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( dquot->dq_sb, dquot->dq_id.type); } if (ret >= 0) ret = ret2; } clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_dqlock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_release); void dquot_destroy(struct dquot *dquot) { kmem_cache_free(dquot_cachep, dquot); } EXPORT_SYMBOL(dquot_destroy); static inline void do_destroy_dquot(struct dquot *dquot) { dquot->dq_sb->dq_op->destroy_dquot(dquot); } /* Invalidate all dquots on the list. Note that this function is called after * quota is disabled and pointers from inodes removed so there cannot be new * quota users. There can still be some users of quotas due to inodes being * just deleted or pruned by prune_icache() (those are not attached to any * list) or parallel quotactl call. We have to wait for such users. */ static void invalidate_dquots(struct super_block *sb, int type) { struct dquot *dquot, *tmp; restart: flush_delayed_work(&quota_release_work); spin_lock(&dq_list_lock); list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { if (dquot->dq_sb != sb) continue; if (dquot->dq_id.type != type) continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* * Once dqput() wakes us up, we know it's time to free * the dquot. * IMPORTANT: we rely on the fact that there is always * at most one process waiting for dquot to free. * Otherwise dq_count would be > 1 and we would never * wake up. */ wait_event(dquot_ref_wq, atomic_read(&dquot->dq_count) == 1); dqput(dquot); /* At this moment dquot() need not exist (it could be * reclaimed by prune_dqcache(). Hence we must * restart. */ goto restart; } /* * The last user already dropped its reference but dquot didn't * get fully cleaned up yet. Restart the scan which flushes the * work cleaning up released dquots. */ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); goto restart; } /* * Quota now has no users and it has been written on last * dqput() */ remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); } spin_unlock(&dq_list_lock); } /* Call callback for every active dquot on given filesystem */ int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), unsigned long priv) { struct dquot *dquot, *old_dquot = NULL; int ret = 0; WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); spin_lock(&dq_list_lock); list_for_each_entry(dquot, &inuse_list, dq_inuse) { if (!dquot_active(dquot)) continue; if (dquot->dq_sb != sb) continue; /* Now we have active dquot so we can just increase use count */ atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqput(old_dquot); old_dquot = dquot; /* * ->release_dquot() can be racing with us. Our reference * protects us from new calls to it so just wait for any * outstanding call and recheck the DQ_ACTIVE_B after that. */ wait_on_dquot(dquot); if (dquot_active(dquot)) { ret = fn(dquot, priv); if (ret < 0) goto out; } spin_lock(&dq_list_lock); /* We are safe to continue now because our dquot could not * be moved out of the inuse list while we hold the reference */ } spin_unlock(&dq_list_lock); out: dqput(old_dquot); return ret; } EXPORT_SYMBOL(dquot_scan_active); static inline int dquot_write_dquot(struct dquot *dquot) { int ret = dquot->dq_sb->dq_op->write_dquot(dquot); if (ret < 0) { quota_error(dquot->dq_sb, "Can't write quota structure " "(error %d). Quota may get out of sync!", ret); /* Clear dirty bit anyway to avoid infinite loop. */ clear_dquot_dirty(dquot); } return ret; } /* Write all dquot structures to quota files */ int dquot_writeback_dquots(struct super_block *sb, int type) { struct list_head dirty; struct dquot *dquot; struct quota_info *dqopt = sb_dqopt(sb); int cnt; int err, ret = 0; WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); flush_delayed_work(&quota_release_work); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; spin_lock(&dq_list_lock); /* Move list away to avoid livelock. */ list_replace_init(&dqopt->info[cnt].dqi_dirty_list, &dirty); while (!list_empty(&dirty)) { dquot = list_first_entry(&dirty, struct dquot, dq_dirty); WARN_ON(!dquot_active(dquot)); /* If the dquot is releasing we should not touch it */ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); flush_delayed_work(&quota_release_work); spin_lock(&dq_list_lock); continue; } /* Now we have active dquot from which someone is * holding reference so we can safely just increase * use count */ dqgrab(dquot); spin_unlock(&dq_list_lock); err = dquot_write_dquot(dquot); if (err && !ret) ret = err; dqput(dquot); spin_lock(&dq_list_lock); } spin_unlock(&dq_list_lock); } for (cnt = 0; cnt < MAXQUOTAS; cnt++) if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); dqstats_inc(DQST_SYNCS); return ret; } EXPORT_SYMBOL(dquot_writeback_dquots); /* Write all dquot structures to disk and make them visible from userspace */ int dquot_quota_sync(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; ret = dquot_writeback_dquots(sb, type); if (ret) return ret; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) return 0; /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ if (sb->s_op->sync_fs) { ret = sb->s_op->sync_fs(sb, 1); if (ret) return ret; } ret = sync_blockdev(sb->s_bdev); if (ret) return ret; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } return 0; } EXPORT_SYMBOL(dquot_quota_sync); static unsigned long dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct dquot *dquot; unsigned long freed = 0; spin_lock(&dq_list_lock); while (!list_empty(&free_dquots) && sc->nr_to_scan) { dquot = list_first_entry(&free_dquots, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); sc->nr_to_scan--; freed++; } spin_unlock(&dq_list_lock); return freed; } static unsigned long dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return vfs_pressure_ratio( percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } /* * Safely release dquot and put reference to dquot. */ static void quota_release_workfn(struct work_struct *work) { struct dquot *dquot; struct list_head rls_head; spin_lock(&dq_list_lock); /* Exchange the list head to avoid livelock. */ list_replace_init(&releasing_dquots, &rls_head); spin_unlock(&dq_list_lock); synchronize_srcu(&dquot_srcu); restart: spin_lock(&dq_list_lock); while (!list_empty(&rls_head)) { dquot = list_first_entry(&rls_head, struct dquot, dq_free); WARN_ON_ONCE(atomic_read(&dquot->dq_count)); /* * Note that DQ_RELEASING_B protects us from racing with * invalidate_dquots() calls so we are safe to work with the * dquot even after we drop dq_list_lock. */ if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ dquot_write_dquot(dquot); goto restart; } if (dquot_active(dquot)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->release_dquot(dquot); goto restart; } /* Dquot is inactive and clean, now move it to free list */ remove_free_dquot(dquot); put_dquot_last(dquot); } spin_unlock(&dq_list_lock); } /* * Put reference to dquot */ void dqput(struct dquot *dquot) { if (!dquot) return; #ifdef CONFIG_QUOTA_DEBUG if (!atomic_read(&dquot->dq_count)) { quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", quotatypes[dquot->dq_id.type], from_kqid(&init_user_ns, dquot->dq_id)); BUG(); } #endif dqstats_inc(DQST_DROPS); spin_lock(&dq_list_lock); if (atomic_read(&dquot->dq_count) > 1) { /* We have more than one user... nothing to do */ atomic_dec(&dquot->dq_count); /* Releasing dquot during quotaoff phase? */ if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) && atomic_read(&dquot->dq_count) == 1) wake_up(&dquot_ref_wq); spin_unlock(&dq_list_lock); return; } /* Need to release dquot? */ WARN_ON_ONCE(!list_empty(&dquot->dq_free)); put_releasing_dquots(dquot); atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); queue_delayed_work(system_unbound_wq, &quota_release_work, 1); } EXPORT_SYMBOL(dqput); struct dquot *dquot_alloc(struct super_block *sb, int type) { return kmem_cache_zalloc(dquot_cachep, GFP_NOFS); } EXPORT_SYMBOL(dquot_alloc); static struct dquot *get_empty_dquot(struct super_block *sb, int type) { struct dquot *dquot; dquot = sb->dq_op->alloc_dquot(sb, type); if(!dquot) return NULL; mutex_init(&dquot->dq_lock); INIT_LIST_HEAD(&dquot->dq_free); INIT_LIST_HEAD(&dquot->dq_inuse); INIT_HLIST_NODE(&dquot->dq_hash); INIT_LIST_HEAD(&dquot->dq_dirty); dquot->dq_sb = sb; dquot->dq_id = make_kqid_invalid(type); atomic_set(&dquot->dq_count, 1); spin_lock_init(&dquot->dq_dqb_lock); return dquot; } /* * Get reference to dquot * * Locking is slightly tricky here. We are guarded from parallel quotaoff() * destroying our dquot by: * a) checking for quota flags under dq_list_lock and * b) getting a reference to dquot before we release dq_list_lock */ struct dquot *dqget(struct super_block *sb, struct kqid qid) { unsigned int hashent = hashfn(sb, qid); struct dquot *dquot, *empty = NULL; if (!qid_has_mapping(sb->s_user_ns, qid)) return ERR_PTR(-EINVAL); if (!sb_has_quota_active(sb, qid.type)) return ERR_PTR(-ESRCH); we_slept: spin_lock(&dq_list_lock); spin_lock(&dq_state_lock); if (!sb_has_quota_active(sb, qid.type)) { spin_unlock(&dq_state_lock); spin_unlock(&dq_list_lock); dquot = ERR_PTR(-ESRCH); goto out; } spin_unlock(&dq_state_lock); dquot = find_dquot(hashent, sb, qid); if (!dquot) { if (!empty) { spin_unlock(&dq_list_lock); empty = get_empty_dquot(sb, qid.type); if (!empty) schedule(); /* Try to wait for a moment... */ goto we_slept; } dquot = empty; empty = NULL; dquot->dq_id = qid; /* all dquots go on the inuse_list */ put_inuse(dquot); /* hash it first so it can be found */ insert_dquot_hash(dquot); spin_unlock(&dq_list_lock); dqstats_inc(DQST_LOOKUPS); } else { if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqstats_inc(DQST_CACHE_HITS); dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is * already finished or it will be canceled due to dq_count > 0 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ if (!dquot_active(dquot)) { int err; err = sb->dq_op->acquire_dquot(dquot); if (err < 0) { dqput(dquot); dquot = ERR_PTR(err); goto out; } } /* * Make sure following reads see filled structure - paired with * smp_mb__before_atomic() in dquot_acquire(). */ smp_rmb(); /* Has somebody invalidated entry under us? */ WARN_ON_ONCE(hlist_unhashed(&dquot->dq_hash)); out: if (empty) do_destroy_dquot(empty); return dquot; } EXPORT_SYMBOL(dqget); static inline struct dquot __rcu **i_dquot(struct inode *inode) { return inode->i_sb->s_op->get_dquots(inode); } static int dqinit_needed(struct inode *inode, int type) { struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) return 0; dquots = i_dquot(inode); if (type != -1) return !dquots[type]; for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!dquots[cnt]) return 1; return 0; } /* This routine is guarded by s_umount semaphore */ static int add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif int err = 0; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || !atomic_read(&inode->i_writecount) || !dqinit_needed(inode, type)) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif iput(old_inode); err = __dquot_initialize(inode, type); if (err) { iput(inode); goto out; } /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the * s_inode_list_lock. We cannot iput the inode now as we can be * holding the last reference and we cannot iput it under * s_inode_list_lock. So we keep the reference and iput it * later. */ old_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(old_inode); out: #ifdef CONFIG_QUOTA_DEBUG if (reserved) { quota_error(sb, "Writes happened before quota was turned on " "thus quota information is probably inconsistent. " "Please run quotacheck(8)"); } #endif return err; } static void remove_dquot_ref(struct super_block *sb, int type) { struct inode *inode; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We have to scan also I_NEW inodes because they can already * have quota pointer initialized. Luckily, we need to touch * only quota pointers and these have separate locking * (dq_data_lock). */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { struct dquot __rcu **dquots = i_dquot(inode); struct dquot *dquot = srcu_dereference_check( dquots[type], &dquot_srcu, lockdep_is_held(&dq_data_lock)); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif rcu_assign_pointer(dquots[type], NULL); if (dquot) dqput(dquot); } spin_unlock(&dq_data_lock); } spin_unlock(&sb->s_inode_list_lock); #ifdef CONFIG_QUOTA_DEBUG if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened after quota" " was disabled thus quota information is probably " "inconsistent. Please run quotacheck(8).\n", sb->s_id); } #endif } /* Gather all references from inodes and drop them */ static void drop_dquot_ref(struct super_block *sb, int type) { if (sb->dq_op) remove_dquot_ref(sb, type); } static inline void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) { if (dquot->dq_dqb.dqb_rsvspace >= number) dquot->dq_dqb.dqb_rsvspace -= number; else { WARN_ON_ONCE(1); dquot->dq_dqb.dqb_rsvspace = 0; } if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curinodes >= number) dquot->dq_dqb.dqb_curinodes -= number; else dquot->dq_dqb.dqb_curinodes = 0; if (dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit) dquot->dq_dqb.dqb_itime = (time64_t) 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); } static void dquot_decr_space(struct dquot *dquot, qsize_t number) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curspace >= number) dquot->dq_dqb.dqb_curspace -= number; else dquot->dq_dqb.dqb_curspace = 0; if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } struct dquot_warn { struct super_block *w_sb; struct kqid w_dq_id; short w_type; }; static int warning_issued(struct dquot *dquot, const int warntype) { int flag = (warntype == QUOTA_NL_BHARDWARN || warntype == QUOTA_NL_BSOFTLONGWARN) ? DQ_BLKS_B : ((warntype == QUOTA_NL_IHARDWARN || warntype == QUOTA_NL_ISOFTLONGWARN) ? DQ_INODES_B : 0); if (!flag) return 0; return test_and_set_bit(flag, &dquot->dq_flags); } #ifdef CONFIG_PRINT_QUOTA_WARNING static int flag_print_warnings = 1; static int need_print_warning(struct dquot_warn *warn) { if (!flag_print_warnings) return 0; switch (warn->w_dq_id.type) { case USRQUOTA: return uid_eq(current_fsuid(), warn->w_dq_id.uid); case GRPQUOTA: return in_group_p(warn->w_dq_id.gid); case PRJQUOTA: return 1; } return 0; } /* Print warning to user which exceeded quota */ static void print_warning(struct dquot_warn *warn) { char *msg = NULL; struct tty_struct *tty; int warntype = warn->w_type; if (warntype == QUOTA_NL_IHARDBELOW || warntype == QUOTA_NL_ISOFTBELOW || warntype == QUOTA_NL_BHARDBELOW || warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(warn)) return; tty = get_current_tty(); if (!tty) return; tty_write_message(tty, warn->w_sb->s_id); if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) tty_write_message(tty, ": warning, "); else tty_write_message(tty, ": write failed, "); tty_write_message(tty, quotatypes[warn->w_dq_id.type]); switch (warntype) { case QUOTA_NL_IHARDWARN: msg = " file limit reached.\r\n"; break; case QUOTA_NL_ISOFTLONGWARN: msg = " file quota exceeded too long.\r\n"; break; case QUOTA_NL_ISOFTWARN: msg = " file quota exceeded.\r\n"; break; case QUOTA_NL_BHARDWARN: msg = " block limit reached.\r\n"; break; case QUOTA_NL_BSOFTLONGWARN: msg = " block quota exceeded too long.\r\n"; break; case QUOTA_NL_BSOFTWARN: msg = " block quota exceeded.\r\n"; break; } tty_write_message(tty, msg); tty_kref_put(tty); } #endif static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot, int warntype) { if (warning_issued(dquot, warntype)) return; warn->w_type = warntype; warn->w_sb = dquot->dq_sb; warn->w_dq_id = dquot->dq_id; } /* * Write warnings to the console and send warning messages over netlink. * * Note that this function can call into tty and networking code. */ static void flush_warnings(struct dquot_warn *warn) { int i; for (i = 0; i < MAXQUOTAS; i++) { if (warn[i].w_type == QUOTA_NL_NOWARN) continue; #ifdef CONFIG_PRINT_QUOTA_WARNING print_warning(&warn[i]); #endif quota_send_warning(warn[i].w_dq_id, warn[i].w_sb->s_dev, warn[i].w_type); } } static int ignore_hardlimit(struct dquot *dquot) { struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; return capable(CAP_SYS_RESOURCE) && (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & DQF_ROOT_SQUASH)); } static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes, struct dquot_warn *warn) { qsize_t newinodes; int ret = 0; spin_lock(&dquot->dq_dqb_lock); newinodes = dquot->dq_dqb.dqb_curinodes + inodes; if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) goto add; if (dquot->dq_dqb.dqb_ihardlimit && newinodes > dquot->dq_dqb.dqb_ihardlimit && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN); ret = -EDQUOT; goto out; } if (dquot->dq_dqb.dqb_isoftlimit && newinodes > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN); ret = -EDQUOT; goto out; } if (dquot->dq_dqb.dqb_isoftlimit && newinodes > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime == 0) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace; } add: dquot->dq_dqb.dqb_curinodes = newinodes; out: spin_unlock(&dquot->dq_dqb_lock); return ret; } static int dquot_add_space(struct dquot *dquot, qsize_t space, qsize_t rsv_space, unsigned int flags, struct dquot_warn *warn) { qsize_t tspace; struct super_block *sb = dquot->dq_sb; int ret = 0; spin_lock(&dquot->dq_dqb_lock); if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) goto finish; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace + space + rsv_space; if (dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); ret = -EDQUOT; goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); ret = -EDQUOT; goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime == 0) { if (flags & DQUOT_SPACE_WARN) { prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() + sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace; } else { /* * We don't allow preallocation to exceed softlimit so exceeding will * be always printed */ ret = -EDQUOT; goto finish; } } finish: /* * We have to be careful and go through warning generation & grace time * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it * only here... */ if (flags & DQUOT_SPACE_NOFAIL) ret = 0; if (!ret) { dquot->dq_dqb.dqb_rsvspace += rsv_space; dquot->dq_dqb.dqb_curspace += space; } spin_unlock(&dquot->dq_dqb_lock); return ret; } static int info_idq_free(struct dquot *dquot, qsize_t inodes) { qsize_t newinodes; if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type)) return QUOTA_NL_NOWARN; newinodes = dquot->dq_dqb.dqb_curinodes - inodes; if (newinodes <= dquot->dq_dqb.dqb_isoftlimit) return QUOTA_NL_ISOFTBELOW; if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit && newinodes < dquot->dq_dqb.dqb_ihardlimit) return QUOTA_NL_IHARDBELOW; return QUOTA_NL_NOWARN; } static int info_bdq_free(struct dquot *dquot, qsize_t space) { qsize_t tspace; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace; if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || tspace <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_NOWARN; if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_BSOFTBELOW; if (tspace >= dquot->dq_dqb.dqb_bhardlimit && tspace - space < dquot->dq_dqb.dqb_bhardlimit) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } static int inode_quota_active(const struct inode *inode) { struct super_block *sb = inode->i_sb; if (IS_NOQUOTA(inode)) return 0; return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb); } /* * Initialize quota pointers in inode * * It is better to call this function outside of any transaction as it * might need a lot of space in journal for dquot structure allocation. */ static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; struct dquot __rcu **dquots; struct dquot *got[MAXQUOTAS] = {}; struct super_block *sb = inode->i_sb; qsize_t rsv; int ret = 0; if (!inode_quota_active(inode)) return 0; dquots = i_dquot(inode); /* First get references to structures we might need. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { struct kqid qid; kprojid_t projid; int rc; struct dquot *dquot; if (type != -1 && cnt != type) continue; /* * The i_dquot should have been initialized in most cases, * we check it without locking here to avoid unnecessary * dqget()/dqput() calls. */ if (dquots[cnt]) continue; if (!sb_has_quota_active(sb, cnt)) continue; init_needed = 1; switch (cnt) { case USRQUOTA: qid = make_kqid_uid(inode->i_uid); break; case GRPQUOTA: qid = make_kqid_gid(inode->i_gid); break; case PRJQUOTA: rc = inode->i_sb->dq_op->get_projid(inode, &projid); if (rc) continue; qid = make_kqid_projid(projid); break; } dquot = dqget(sb, qid); if (IS_ERR(dquot)) { /* We raced with somebody turning quotas off... */ if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } got[cnt] = dquot; } /* All required i_dquot has been initialized */ if (!init_needed) return 0; spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) goto out_lock; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; /* Avoid races with quotaoff() */ if (!sb_has_quota_active(sb, cnt)) continue; /* We could race with quotaon or dqget() could have failed */ if (!got[cnt]) continue; if (!dquots[cnt]) { rcu_assign_pointer(dquots[cnt], got[cnt]); got[cnt] = NULL; /* * Make quota reservation system happy if someone * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) { struct dquot *dquot = srcu_dereference_check( dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); spin_lock(&inode->i_lock); /* Get reservation again under proper lock */ rsv = __inode_get_rsv_space(inode); spin_lock(&dquot->dq_dqb_lock); dquot->dq_dqb.dqb_rsvspace += rsv; spin_unlock(&dquot->dq_dqb_lock); spin_unlock(&inode->i_lock); } } } out_lock: spin_unlock(&dq_data_lock); out_put: /* Drop unused references */ dqput_all(got); return ret; } int dquot_initialize(struct inode *inode) { return __dquot_initialize(inode, -1); } EXPORT_SYMBOL(dquot_initialize); bool dquot_initialize_needed(struct inode *inode) { struct dquot __rcu **dquots; int i; if (!inode_quota_active(inode)) return false; dquots = i_dquot(inode); for (i = 0; i < MAXQUOTAS; i++) if (!dquots[i] && sb_has_quota_active(inode->i_sb, i)) return true; return false; } EXPORT_SYMBOL(dquot_initialize_needed); /* * Release all quotas referenced by inode. * * This function only be called on inode free or converting * a file to quota file, no other users for the i_dquot in * both cases, so we needn't call synchronize_srcu() after * clearing i_dquot. */ static void __dquot_drop(struct inode *inode) { int cnt; struct dquot __rcu **dquots = i_dquot(inode); struct dquot *put[MAXQUOTAS]; spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); rcu_assign_pointer(dquots[cnt], NULL); } spin_unlock(&dq_data_lock); dqput_all(put); } void dquot_drop(struct inode *inode) { struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) return; /* * Test before calling to rule out calls from proc and such * where we are not allowed to block. Note that this is * actually reliable test even without the lock - the caller * must assure that nobody can come after the DQUOT_DROP and * add quota pointers back anyway. */ dquots = i_dquot(inode); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (dquots[cnt]) break; } if (cnt < MAXQUOTAS) __dquot_drop(inode); } EXPORT_SYMBOL(dquot_drop); /* * inode_reserved_space is managed internally by quota, and protected by * i_lock similar to i_blocks+i_bytes. */ static qsize_t *inode_reserved_space(struct inode * inode) { /* Filesystem must explicitly define it's own method in order to use * quota reservation interface */ BUG_ON(!inode->i_sb->dq_op->get_reserved_space); return inode->i_sb->dq_op->get_reserved_space(inode); } static qsize_t __inode_get_rsv_space(struct inode *inode) { if (!inode->i_sb->dq_op->get_reserved_space) return 0; return *inode_reserved_space(inode); } static qsize_t inode_get_rsv_space(struct inode *inode) { qsize_t ret; if (!inode->i_sb->dq_op->get_reserved_space) return 0; spin_lock(&inode->i_lock); ret = __inode_get_rsv_space(inode); spin_unlock(&inode->i_lock); return ret; } /* * This functions updates i_blocks+i_bytes fields and quota information * (together with appropriate checks). * * NOTE: We absolutely rely on the fact that caller dirties the inode * (usually helpers in quotaops.h care about this) and holds a handle for * the current transaction so that dquot write and inode write go into the * same transaction. */ /* * This operation can block, but only after everything is updated */ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; struct dquot __rcu **dquots; struct dquot *dquot; if (!inode_quota_active(inode)) { if (reserve) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) += number; spin_unlock(&inode->i_lock); } else { inode_add_bytes(inode, number); } goto out; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; if (reserve) { ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]); } else { ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]); } if (ret) { /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); if (reserve) dquot_free_reserved_space(dquot, number); else dquot_decr_space(dquot, number); spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); goto out_flush_warn; } } if (reserve) *inode_reserved_space(inode) += number; else __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); if (reserve) goto out_flush_warn; ret = mark_all_dquot_dirty(dquots); out_flush_warn: srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); out: return ret; } EXPORT_SYMBOL(__dquot_alloc_space); /* * This operation can block, but only after everything is updated */ int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu * const *dquots; struct dquot *dquot; if (!inode_quota_active(inode)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; ret = dquot_add_inodes(dquot, 1, &warn[cnt]); if (ret) { for (cnt--; cnt >= 0; cnt--) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; /* Back out changes we already did */ spin_lock(&dquot->dq_dqb_lock); dquot_decr_inodes(dquot, 1); spin_unlock(&dquot->dq_dqb_lock); } goto warn_put_all; } } warn_put_all: spin_unlock(&inode->i_lock); if (ret == 0) ret = mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); return ret; } EXPORT_SYMBOL(dquot_alloc_inode); /* * Convert in-memory reserved quotas to real consumed quotas */ void dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) number = dquot->dq_dqb.dqb_rsvspace; dquot->dq_dqb.dqb_curspace += number; dquot->dq_dqb.dqb_rsvspace -= number; spin_unlock(&dquot->dq_dqb_lock); } } /* Update inode bytes */ *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); } EXPORT_SYMBOL(dquot_claim_space_nodirty); /* * Convert allocated space back to in-memory reserved quotas */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) += number; __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) number = dquot->dq_dqb.dqb_curspace; dquot->dq_dqb.dqb_rsvspace += number; dquot->dq_dqb.dqb_curspace -= number; spin_unlock(&dquot->dq_dqb_lock); } } /* Update inode bytes */ *inode_reserved_space(inode) += number; __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); } EXPORT_SYMBOL(dquot_reclaim_space_nodirty); /* * This operation can block, but only after everything is updated */ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu **dquots; struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!inode_quota_active(inode)) { if (reserve) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; spin_unlock(&inode->i_lock); } else { inode_sub_bytes(inode, number); } return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); wtype = info_bdq_free(dquot, number); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquot, wtype); if (reserve) dquot_free_reserved_space(dquot, number); else dquot_decr_space(dquot, number); spin_unlock(&dquot->dq_dqb_lock); } if (reserve) *inode_reserved_space(inode) -= number; else __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); if (reserve) goto out_unlock; mark_all_dquot_dirty(dquots); out_unlock: srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(__dquot_free_space); /* * This operation can block, but only after everything is updated */ void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu * const *dquots; struct dquot *dquot; int index; if (!inode_quota_active(inode)) return; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); wtype = info_idq_free(dquot, 1); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquot, wtype); dquot_decr_inodes(dquot, 1); spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(dquot_free_inode); /* * Transfer the number of inode and blocks from one diskquota to an other. * On success, dquot references in transfer_to are consumed and references * to original dquots that need to be released are placed there. On failure, * references are kept untouched. * * This operation can block, but only after everything is updated * A transaction must be started when entering this function. * * We are holding reference on transfer_from & transfer_to, no need to * protect them by srcu_read_lock(). */ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) { qsize_t cur_space; qsize_t rsv_space = 0; qsize_t inode_usage = 1; struct dquot __rcu **dquots; struct dquot *transfer_from[MAXQUOTAS] = {}; int cnt, index, ret = 0, err; char is_valid[MAXQUOTAS] = {}; struct dquot_warn warn_to[MAXQUOTAS]; struct dquot_warn warn_from_inodes[MAXQUOTAS]; struct dquot_warn warn_from_space[MAXQUOTAS]; if (IS_NOQUOTA(inode)) return 0; if (inode->i_sb->dq_op->get_inode_usage) { ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage); if (ret) return ret; } /* Initialize the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { warn_to[cnt].w_type = QUOTA_NL_NOWARN; warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; } spin_lock(&dq_data_lock); spin_lock(&inode->i_lock); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); return 0; } cur_space = __inode_get_bytes(inode); rsv_space = __inode_get_rsv_space(inode); dquots = i_dquot(inode); /* * Build the transfer_from list, check limits, and update usage in * the target structures. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { /* * Skip changes for same uid or gid or for turned off quota-type. */ if (!transfer_to[cnt]) continue; /* Avoid races with quotaoff() */ if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; transfer_from[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); ret = dquot_add_inodes(transfer_to[cnt], inode_usage, &warn_to[cnt]); if (ret) goto over_quota; ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, DQUOT_SPACE_WARN, &warn_to[cnt]); if (ret) { spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); spin_unlock(&transfer_to[cnt]->dq_dqb_lock); goto over_quota; } } /* Decrease usage for source structures and update quota pointers */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!is_valid[cnt]) continue; /* Due to IO error we might not have transfer_from[] structure */ if (transfer_from[cnt]) { int wtype; spin_lock(&transfer_from[cnt]->dq_dqb_lock); wtype = info_idq_free(transfer_from[cnt], inode_usage); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_inodes[cnt], transfer_from[cnt], wtype); wtype = info_bdq_free(transfer_from[cnt], cur_space + rsv_space); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_space[cnt], transfer_from[cnt], wtype); dquot_decr_inodes(transfer_from[cnt], inode_usage); dquot_decr_space(transfer_from[cnt], cur_space); dquot_free_reserved_space(transfer_from[cnt], rsv_space); spin_unlock(&transfer_from[cnt]->dq_dqb_lock); } rcu_assign_pointer(dquots[cnt], transfer_to[cnt]); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); /* * These arrays are local and we hold dquot references so we don't need * the srcu protection but still take dquot_srcu to avoid warning in * mark_all_dquot_dirty(). */ index = srcu_read_lock(&dquot_srcu); err = mark_all_dquot_dirty((struct dquot __rcu **)transfer_from); if (err < 0) ret = err; err = mark_all_dquot_dirty((struct dquot __rcu **)transfer_to); if (err < 0) ret = err; srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn_to); flush_warnings(warn_from_inodes); flush_warnings(warn_from_space); /* Pass back references to put */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (is_valid[cnt]) transfer_to[cnt] = transfer_from[cnt]; return ret; over_quota: /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { if (!is_valid[cnt]) continue; spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); dquot_decr_space(transfer_to[cnt], cur_space); dquot_free_reserved_space(transfer_to[cnt], rsv_space); spin_unlock(&transfer_to[cnt]->dq_dqb_lock); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); flush_warnings(warn_to); return ret; } EXPORT_SYMBOL(__dquot_transfer); /* Wrapper for transferring ownership of an inode for uid/gid only * Called from FSXXX_setattr() */ int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode, struct iattr *iattr) { struct dquot *transfer_to[MAXQUOTAS] = {}; struct dquot *dquot; struct super_block *sb = inode->i_sb; int ret; if (!inode_quota_active(inode)) return 0; if (i_uid_needs_update(idmap, iattr, inode)) { kuid_t kuid = from_vfsuid(idmap, i_user_ns(inode), iattr->ia_vfsuid); dquot = dqget(sb, make_kqid_uid(kuid)); if (IS_ERR(dquot)) { if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } transfer_to[USRQUOTA] = dquot; } if (i_gid_needs_update(idmap, iattr, inode)) { kgid_t kgid = from_vfsgid(idmap, i_user_ns(inode), iattr->ia_vfsgid); dquot = dqget(sb, make_kqid_gid(kgid)); if (IS_ERR(dquot)) { if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } transfer_to[GRPQUOTA] = dquot; } ret = __dquot_transfer(inode, transfer_to); out_put: dqput_all(transfer_to); return ret; } EXPORT_SYMBOL(dquot_transfer); /* * Write info of quota file to disk */ int dquot_commit_info(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); return dqopt->ops[type]->write_file_info(sb, type); } EXPORT_SYMBOL(dquot_commit_info); int dquot_get_next_id(struct super_block *sb, struct kqid *qid) { struct quota_info *dqopt = sb_dqopt(sb); if (!sb_has_quota_active(sb, qid->type)) return -ESRCH; if (!dqopt->ops[qid->type]->get_next_id) return -ENOSYS; return dqopt->ops[qid->type]->get_next_id(sb, qid); } EXPORT_SYMBOL(dquot_get_next_id); /* * Definitions of diskquota operations. */ const struct dquot_operations dquot_operations = { .write_dquot = dquot_commit, .acquire_dquot = dquot_acquire, .release_dquot = dquot_release, .mark_dirty = dquot_mark_dquot_dirty, .write_info = dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_next_id = dquot_get_next_id, }; EXPORT_SYMBOL(dquot_operations); /* * Generic helper for ->open on filesystems supporting disk quotas. */ int dquot_file_open(struct inode *inode, struct file *file) { int error; error = generic_file_open(inode, file); if (!error && (file->f_mode & FMODE_WRITE)) error = dquot_initialize(inode); return error; } EXPORT_SYMBOL(dquot_file_open); static void vfs_cleanup_quota_inode(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); struct inode *inode = dqopt->files[type]; if (!inode) return; if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { inode_lock(inode); inode->i_flags &= ~S_NOQUOTA; inode_unlock(inode); } dqopt->files[type] = NULL; iput(inode); } /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ int dquot_disable(struct super_block *sb, int type, unsigned int flags) { int cnt; struct quota_info *dqopt = sb_dqopt(sb); rwsem_assert_held_write(&sb->s_umount); /* Cannot turn off usage accounting without turning off limits, or * suspend quotas and simultaneously turn quotas off. */ if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED)) || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED | DQUOT_USAGE_ENABLED))) return -EINVAL; /* * Skip everything if there's nothing to do. We have to do this because * sometimes we are called when fill_super() failed and calling * sync_fs() in such cases does no good. */ if (!sb_any_quota_loaded(sb)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_loaded(sb, cnt)) continue; if (flags & DQUOT_SUSPENDED) { spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_SUSPENDED, cnt); spin_unlock(&dq_state_lock); } else { spin_lock(&dq_state_lock); dqopt->flags &= ~dquot_state_flag(flags, cnt); /* Turning off suspended quotas? */ if (!sb_has_quota_loaded(sb, cnt) && sb_has_quota_suspended(sb, cnt)) { dqopt->flags &= ~dquot_state_flag( DQUOT_SUSPENDED, cnt); spin_unlock(&dq_state_lock); vfs_cleanup_quota_inode(sb, cnt); continue; } spin_unlock(&dq_state_lock); } /* We still have to keep quota loaded? */ if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED)) continue; /* Note: these are blocking operations */ drop_dquot_ref(sb, cnt); invalidate_dquots(sb, cnt); /* * Now all dquots should be invalidated, all writes done so we * should be only users of the info. No locks needed. */ if (info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); if (dqopt->ops[cnt]->free_file_info) dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; dqopt->info[cnt].dqi_bgrace = 0; dqopt->ops[cnt] = NULL; } /* Skip syncing and setting flags if quota files are hidden */ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) goto put_inodes; /* Sync the superblock so that buffers with quota data are written to * disk (and so userspace sees correct data afterwards). */ if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); /* Now the quota files are just ordinary files and we can set the * inode flags back. Moreover we discard the pagecache so that * userspace sees the writes we did bypassing the pagecache. We * must also discard the blockdev buffers so that we see the * changes done by userspace on the next quotaon() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!sb_has_quota_loaded(sb, cnt) && dqopt->files[cnt]) { inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } if (sb->s_bdev) invalidate_bdev(sb->s_bdev); put_inodes: /* We are done when suspending quotas */ if (flags & DQUOT_SUSPENDED) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!sb_has_quota_loaded(sb, cnt)) vfs_cleanup_quota_inode(sb, cnt); return 0; } EXPORT_SYMBOL(dquot_disable); int dquot_quota_off(struct super_block *sb, int type) { return dquot_disable(sb, type, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); } EXPORT_SYMBOL(dquot_quota_off); /* * Turn quotas on on a device */ static int vfs_setup_quota_inode(struct inode *inode, int type) { struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); if (is_bad_inode(inode)) return -EUCLEAN; if (!S_ISREG(inode->i_mode)) return -EACCES; if (IS_RDONLY(inode)) return -EROFS; if (sb_has_quota_loaded(sb, type)) return -EBUSY; /* * Quota files should never be encrypted. They should be thought of as * filesystem metadata, not user data. New-style internal quota files * cannot be encrypted by users anyway, but old-style external quota * files could potentially be incorrectly created in an encrypted * directory, hence this explicit check. Some reasons why encrypted * quota files don't work include: (1) some filesystems that support * encryption don't handle it in their quota_read and quota_write, and * (2) cleaning up encrypted quota files at unmount would need special * consideration, as quota files are cleaned up later than user files. */ if (IS_ENCRYPTED(inode)) return -EINVAL; dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) return -EIO; if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* We don't want quota and atime on quota files (deadlocks * possible) Also nobody should write to the file - we use * special IO operations which ignore the immutable bit. */ inode_lock(inode); inode->i_flags |= S_NOQUOTA; inode_unlock(inode); /* * When S_NOQUOTA is set, remove dquot references as no more * references can be added */ __dquot_drop(inode); } return 0; } int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, unsigned int flags) { struct quota_format_type *fmt; struct quota_info *dqopt = sb_dqopt(sb); int error; lockdep_assert_held_write(&sb->s_umount); /* Just unsuspend quotas? */ if (WARN_ON_ONCE(flags & DQUOT_SUSPENDED)) return -EINVAL; fmt = find_quota_format(format_id); if (!fmt) return -ESRCH; if (!sb->dq_op || !sb->s_qcop || (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) { error = -EINVAL; goto out_fmt; } /* Filesystems outside of init_user_ns not yet supported */ if (sb->s_user_ns != &init_user_ns) { error = -EINVAL; goto out_fmt; } /* Usage always has to be set... */ if (!(flags & DQUOT_USAGE_ENABLED)) { error = -EINVAL; goto out_fmt; } if (sb_has_quota_loaded(sb, type)) { error = -EBUSY; goto out_fmt; } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* As we bypass the pagecache we must now flush all the * dirty data and invalidate caches so that kernel sees * changes from userspace. It is not enough to just flush * the quota file since if blocksize < pagesize, invalidation * of the cache could fail because of other unrelated dirty * data */ sync_filesystem(sb); invalidate_bdev(sb->s_bdev); } error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) goto out_fmt; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; dqopt->info[type].dqi_fmt_id = format_id; INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); error = dqopt->ops[type]->read_file_info(sb, type); if (error < 0) goto out_fmt; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) { spin_lock(&dq_data_lock); dqopt->info[type].dqi_flags |= DQF_SYS_FILE; spin_unlock(&dq_data_lock); } spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); error = add_dquot_ref(sb, type); if (error) dquot_disable(sb, type, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; out_fmt: put_quota_format(fmt); return error; } EXPORT_SYMBOL(dquot_load_quota_sb); /* * More powerful function for turning on quotas on given quota inode allowing * setting of individual quota flags */ int dquot_load_quota_inode(struct inode *inode, int type, int format_id, unsigned int flags) { int err; err = vfs_setup_quota_inode(inode, type); if (err < 0) return err; err = dquot_load_quota_sb(inode->i_sb, type, format_id, flags); if (err < 0) vfs_cleanup_quota_inode(inode->i_sb, type); return err; } EXPORT_SYMBOL(dquot_load_quota_inode); /* Reenable quotas on remount RW */ int dquot_resume(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); int ret = 0, cnt; unsigned int flags; rwsem_assert_held_write(&sb->s_umount); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_suspended(sb, cnt)) continue; spin_lock(&dq_state_lock); flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED, cnt); dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt); spin_unlock(&dq_state_lock); flags = dquot_generic_flag(flags, cnt); ret = dquot_load_quota_sb(sb, cnt, dqopt->info[cnt].dqi_fmt_id, flags); if (ret < 0) vfs_cleanup_quota_inode(sb, cnt); } return ret; } EXPORT_SYMBOL(dquot_resume); int dquot_quota_on(struct super_block *sb, int type, int format_id, const struct path *path) { int error = security_quota_on(path->dentry); if (error) return error; /* Quota file not on the same filesystem? */ if (path->dentry->d_sb != sb) error = -EXDEV; else error = dquot_load_quota_inode(d_inode(path->dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; } EXPORT_SYMBOL(dquot_quota_on); /* * This function is used when filesystem needs to initialize quotas * during mount time. */ int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type) { struct dentry *dentry; int error; dentry = lookup_positive_unlocked(qf_name, sb->s_root, strlen(qf_name)); if (IS_ERR(dentry)) return PTR_ERR(dentry); error = security_quota_on(dentry); if (!error) error = dquot_load_quota_inode(d_inode(dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); dput(dentry); return error; } EXPORT_SYMBOL(dquot_quota_on_mount); static int dquot_quota_enable(struct super_block *sb, unsigned int flags) { int ret; int type; struct quota_info *dqopt = sb_dqopt(sb); if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) return -ENOSYS; /* Accounting cannot be turned on while fs is mounted */ flags &= ~(FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT); if (!flags) return -EINVAL; for (type = 0; type < MAXQUOTAS; type++) { if (!(flags & qtype_enforce_flag(type))) continue; /* Can't enforce without accounting */ if (!sb_has_quota_usage_enabled(sb, type)) { ret = -EINVAL; goto out_err; } if (sb_has_quota_limits_enabled(sb, type)) { /* compatible with XFS */ ret = -EEXIST; goto out_err; } spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type); spin_unlock(&dq_state_lock); } return 0; out_err: /* Backout enforcement enablement we already did */ for (type--; type >= 0; type--) { if (flags & qtype_enforce_flag(type)) dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); } return ret; } static int dquot_quota_disable(struct super_block *sb, unsigned int flags) { int ret; int type; struct quota_info *dqopt = sb_dqopt(sb); if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) return -ENOSYS; /* * We don't support turning off accounting via quotactl. In principle * quota infrastructure can do this but filesystems don't expect * userspace to be able to do it. */ if (flags & (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT)) return -EOPNOTSUPP; /* Filter out limits not enabled */ for (type = 0; type < MAXQUOTAS; type++) if (!sb_has_quota_limits_enabled(sb, type)) flags &= ~qtype_enforce_flag(type); /* Nothing left? */ if (!flags) return -EEXIST; for (type = 0; type < MAXQUOTAS; type++) { if (flags & qtype_enforce_flag(type)) { ret = dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); if (ret < 0) goto out_err; } } return 0; out_err: /* Backout enforcement disabling we already did */ for (type--; type >= 0; type--) { if (flags & qtype_enforce_flag(type)) { spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type); spin_unlock(&dq_state_lock); } } return ret; } /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; memset(di, 0, sizeof(*di)); spin_lock(&dquot->dq_dqb_lock); di->d_spc_hardlimit = dm->dqb_bhardlimit; di->d_spc_softlimit = dm->dqb_bsoftlimit; di->d_ino_hardlimit = dm->dqb_ihardlimit; di->d_ino_softlimit = dm->dqb_isoftlimit; di->d_space = dm->dqb_curspace + dm->dqb_rsvspace; di->d_ino_count = dm->dqb_curinodes; di->d_spc_timer = dm->dqb_btime; di->d_ino_timer = dm->dqb_itime; spin_unlock(&dquot->dq_dqb_lock); } int dquot_get_dqblk(struct super_block *sb, struct kqid qid, struct qc_dqblk *di) { struct dquot *dquot; dquot = dqget(sb, qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); do_get_dqblk(dquot, di); dqput(dquot); return 0; } EXPORT_SYMBOL(dquot_get_dqblk); int dquot_get_next_dqblk(struct super_block *sb, struct kqid *qid, struct qc_dqblk *di) { struct dquot *dquot; int err; if (!sb->dq_op->get_next_id) return -ENOSYS; err = sb->dq_op->get_next_id(sb, qid); if (err < 0) return err; dquot = dqget(sb, *qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); do_get_dqblk(dquot, di); dqput(dquot); return 0; } EXPORT_SYMBOL(dquot_get_next_dqblk); #define VFS_QC_MASK \ (QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \ QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \ QC_SPC_TIMER | QC_INO_TIMER) /* Generic routine for setting common part of quota structure */ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; int ret; if (di->d_fieldmask & ~VFS_QC_MASK) return -EINVAL; if (((di->d_fieldmask & QC_SPC_SOFT) && di->d_spc_softlimit > dqi->dqi_max_spc_limit) || ((di->d_fieldmask & QC_SPC_HARD) && di->d_spc_hardlimit > dqi->dqi_max_spc_limit) || ((di->d_fieldmask & QC_INO_SOFT) && (di->d_ino_softlimit > dqi->dqi_max_ino_limit)) || ((di->d_fieldmask & QC_INO_HARD) && (di->d_ino_hardlimit > dqi->dqi_max_ino_limit))) return -ERANGE; spin_lock(&dquot->dq_dqb_lock); if (di->d_fieldmask & QC_SPACE) { dm->dqb_curspace = di->d_space - dm->dqb_rsvspace; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_SPC_SOFT) dm->dqb_bsoftlimit = di->d_spc_softlimit; if (di->d_fieldmask & QC_SPC_HARD) dm->dqb_bhardlimit = di->d_spc_hardlimit; if (di->d_fieldmask & (QC_SPC_SOFT | QC_SPC_HARD)) { check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_COUNT) { dm->dqb_curinodes = di->d_ino_count; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_SOFT) dm->dqb_isoftlimit = di->d_ino_softlimit; if (di->d_fieldmask & QC_INO_HARD) dm->dqb_ihardlimit = di->d_ino_hardlimit; if (di->d_fieldmask & (QC_INO_SOFT | QC_INO_HARD)) { check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_SPC_TIMER) { dm->dqb_btime = di->d_spc_timer; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_TIMER) { dm->dqb_itime = di->d_ino_timer; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } if (check_blim) { if (!dm->dqb_bsoftlimit || dm->dqb_curspace + dm->dqb_rsvspace <= dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_SPC_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = ktime_get_real_seconds() + dqi->dqi_bgrace; } if (check_ilim) { if (!dm->dqb_isoftlimit || dm->dqb_curinodes <= dm->dqb_isoftlimit) { dm->dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_INO_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = ktime_get_real_seconds() + dqi->dqi_igrace; } if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); spin_unlock(&dquot->dq_dqb_lock); ret = mark_dquot_dirty(dquot); if (ret < 0) return ret; return 0; } int dquot_set_dqblk(struct super_block *sb, struct kqid qid, struct qc_dqblk *di) { struct dquot *dquot; int rc; dquot = dqget(sb, qid); if (IS_ERR(dquot)) { rc = PTR_ERR(dquot); goto out; } rc = do_set_dqblk(dquot, di); dqput(dquot); out: return rc; } EXPORT_SYMBOL(dquot_set_dqblk); /* Generic routine for getting common part of quota file information */ int dquot_get_state(struct super_block *sb, struct qc_state *state) { struct mem_dqinfo *mi; struct qc_type_state *tstate; struct quota_info *dqopt = sb_dqopt(sb); int type; memset(state, 0, sizeof(*state)); for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_active(sb, type)) continue; tstate = state->s_state + type; mi = sb_dqopt(sb)->info + type; tstate->flags = QCI_ACCT_ENABLED; spin_lock(&dq_data_lock); if (mi->dqi_flags & DQF_SYS_FILE) tstate->flags |= QCI_SYSFILE; if (mi->dqi_flags & DQF_ROOT_SQUASH) tstate->flags |= QCI_ROOT_SQUASH; if (sb_has_quota_limits_enabled(sb, type)) tstate->flags |= QCI_LIMITS_ENFORCED; tstate->spc_timelimit = mi->dqi_bgrace; tstate->ino_timelimit = mi->dqi_igrace; if (dqopt->files[type]) { tstate->ino = dqopt->files[type]->i_ino; tstate->blocks = dqopt->files[type]->i_blocks; } tstate->nextents = 1; /* We don't know... */ spin_unlock(&dq_data_lock); } return 0; } EXPORT_SYMBOL(dquot_get_state); /* Generic routine for setting common part of quota file information */ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) { struct mem_dqinfo *mi; if ((ii->i_fieldmask & QC_WARNS_MASK) || (ii->i_fieldmask & QC_RT_SPC_TIMER)) return -EINVAL; if (!sb_has_quota_active(sb, type)) return -ESRCH; mi = sb_dqopt(sb)->info + type; if (ii->i_fieldmask & QC_FLAGS) { if ((ii->i_flags & QCI_ROOT_SQUASH && mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) return -EINVAL; } spin_lock(&dq_data_lock); if (ii->i_fieldmask & QC_SPC_TIMER) mi->dqi_bgrace = ii->i_spc_timelimit; if (ii->i_fieldmask & QC_INO_TIMER) mi->dqi_igrace = ii->i_ino_timelimit; if (ii->i_fieldmask & QC_FLAGS) { if (ii->i_flags & QCI_ROOT_SQUASH) mi->dqi_flags |= DQF_ROOT_SQUASH; else mi->dqi_flags &= ~DQF_ROOT_SQUASH; } spin_unlock(&dq_data_lock); mark_info_dirty(sb, type); /* Force write to disk */ return sb->dq_op->write_info(sb, type); } EXPORT_SYMBOL(dquot_set_dqinfo); const struct quotactl_ops dquot_quotactl_sysfile_ops = { .quota_enable = dquot_quota_enable, .quota_disable = dquot_quota_disable, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, .get_dqblk = dquot_get_dqblk, .get_nextdqblk = dquot_get_next_dqblk, .set_dqblk = dquot_set_dqblk }; EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); static int do_proc_dqstats(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int type = (unsigned long *)table->data - dqstats.stat; s64 value = percpu_counter_sum(&dqstats.counter[type]); /* Filter negative values for non-monotonic counters */ if (value < 0 && (type == DQST_ALLOC_DQUOTS || type == DQST_FREE_DQUOTS)) value = 0; /* Update global table */ dqstats.stat[type] = value; return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static const struct ctl_table fs_dqstats_table[] = { { .procname = "lookups", .data = &dqstats.stat[DQST_LOOKUPS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "drops", .data = &dqstats.stat[DQST_DROPS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "reads", .data = &dqstats.stat[DQST_READS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "writes", .data = &dqstats.stat[DQST_WRITES], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "cache_hits", .data = &dqstats.stat[DQST_CACHE_HITS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "allocated_dquots", .data = &dqstats.stat[DQST_ALLOC_DQUOTS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "free_dquots", .data = &dqstats.stat[DQST_FREE_DQUOTS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "syncs", .data = &dqstats.stat[DQST_SYNCS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, #ifdef CONFIG_PRINT_QUOTA_WARNING { .procname = "warnings", .data = &flag_print_warnings, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif }; static int __init dquot_init(void) { int i, ret; unsigned long nr_hash, order; struct shrinker *dqcache_shrinker; printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); register_sysctl_init("fs/quota", fs_dqstats_table); dquot_cachep = kmem_cache_create("dquot", sizeof(struct dquot), sizeof(unsigned long) * 4, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_PANIC), NULL); order = 0; dquot_hash = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order); if (!dquot_hash) panic("Cannot create dquot hash table"); ret = percpu_counter_init_many(dqstats.counter, 0, GFP_KERNEL, _DQST_DQSTAT_LAST); if (ret) panic("Cannot create dquot stat counters"); /* Find power-of-two hlist_heads which can fit into allocation */ nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); dq_hash_bits = ilog2(nr_hash); nr_hash = 1UL << dq_hash_bits; dq_hash_mask = nr_hash - 1; for (i = 0; i < nr_hash; i++) INIT_HLIST_HEAD(dquot_hash + i); pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); dqcache_shrinker = shrinker_alloc(0, "dquota-cache"); if (!dqcache_shrinker) panic("Cannot allocate dquot shrinker"); dqcache_shrinker->count_objects = dqcache_shrink_count; dqcache_shrinker->scan_objects = dqcache_shrink_scan; shrinker_register(dqcache_shrinker); return 0; } fs_initcall(dquot_init);
463 148 1381 282 34 34 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RCULIST_NULLS_H #define _LINUX_RCULIST_NULLS_H #ifdef __KERNEL__ /* * RCU-protected list version */ #include <linux/list_nulls.h> #include <linux/rcupdate.h> /** * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * * Note: hlist_nulls_unhashed() on the node return true after this. It is * useful for RCU based read lockfree traversal if the writer side * must know if the list entry is still hashed or already unhashed. * * In particular, it means that we can not poison the forward pointers * that may still be used for walking the hash list and we can only * zero the pprev pointer so list_unhashed() will return true after * this. * * The caller must take whatever precautions are necessary (such as * holding appropriate locks) to avoid racing with another * list-mutation primitive, such as hlist_nulls_add_head_rcu() or * hlist_nulls_del_rcu(), running on this same list. However, it is * perfectly legal to run concurrently with the _rcu list-traversal * primitives, such as hlist_nulls_for_each_entry_rcu(). */ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); WRITE_ONCE(n->pprev, NULL); } } /** * hlist_nulls_first_rcu - returns the first element of the hash list. * @head: the head of the list. */ #define hlist_nulls_first_rcu(head) \ (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) /** * hlist_nulls_next_rcu - returns the element of the list after @node. * @node: element of the list. */ #define hlist_nulls_next_rcu(node) \ (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. * * Note: hlist_nulls_unhashed() on entry does not return true after this, * the entry is in an undefined state. It is useful for RCU based * lockfree traversal. * * In particular, it means that we can not poison the forward * pointers that may still be used for walking the hash list. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry(). */ static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); WRITE_ONCE(n->pprev, LIST_POISON2); } /** * hlist_nulls_add_head_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist_nulls, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_head *h) { struct hlist_nulls_node *first = h->first; WRITE_ONCE(n->next, first); WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) WRITE_ONCE(first->pprev, &n->next); } /** * hlist_nulls_add_tail_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist_nulls, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, struct hlist_nulls_head *h) { struct hlist_nulls_node *i, *last = NULL; /* Note: write side code, so rcu accessors are not needed. */ for (i = h->first; !is_a_nulls(i); i = i->next) last = i; if (last) { WRITE_ONCE(n->next, last->next); n->pprev = &last->next; rcu_assign_pointer(hlist_nulls_next_rcu(last), n); } else { hlist_nulls_add_head_rcu(n, h); } } /* after that hlist_nulls_del will work */ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) { n->pprev = &n->next; n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); } /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. * * The barrier() is needed to make sure compiler doesn't cache first element [1], * as this loop can be restarted [2] * [1] Documentation/memory-barriers.txt around line 1533 * [2] Documentation/RCU/rculist_nulls.rst around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ for (({barrier();}), \ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) /** * hlist_nulls_for_each_entry_safe - * iterate over list of given type safe against removal of list entry * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. */ #define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ for (({barrier();}), \ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) #endif #endif
25 1 1 12 12 12 12 12 12 1 1 12 12 9 1 1 1 1 1 1 1 12 12 12 12 7 7 7 7 12 12 12 12 7 7 6 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_remote.h" #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_health.h" #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ /* * Remote Attribute Values * ======================= * * Remote extended attribute values are conceptually simple -- they're written * to data blocks mapped by an inode's attribute fork, and they have an upper * size limit of 64k. Setting a value does not involve the XFS log. * * However, on a v5 filesystem, maximally sized remote attr values require one * block more than 64k worth of space to hold both the remote attribute value * header (64 bytes). On a 4k block filesystem this results in a 68k buffer; * on a 64k block filesystem, this would be a 128k buffer. Note that the log * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k). * Therefore, we /must/ ensure that remote attribute value buffers never touch * the logging system and therefore never have a log item. */ /* How many bytes can be stored in a remote value buffer? */ inline unsigned int xfs_attr3_rmt_buf_space( struct xfs_mount *mp) { unsigned int blocksize = mp->m_attr_geo->blksize; if (xfs_has_crc(mp)) return blocksize - sizeof(struct xfs_attr3_rmt_hdr); return blocksize; } /* Compute number of fsblocks needed to store a remote attr value */ unsigned int xfs_attr3_rmt_blocks( struct xfs_mount *mp, unsigned int attrlen) { /* * Each contiguous block has a header, so it is not just a simple * attribute length to FSB conversion. */ if (xfs_has_crc(mp)) return howmany(attrlen, xfs_attr3_rmt_buf_space(mp)); return XFS_B_TO_FSB(mp, attrlen); } /* * Checking of the remote attribute header is split into two parts. The verifier * does CRC, location and bounds checking, the unpacking function checks the * attribute parameters and owner. */ static xfs_failaddr_t xfs_attr3_rmt_hdr_ok( void *ptr, xfs_ino_t ino, uint32_t offset, uint32_t size, xfs_daddr_t bno) { struct xfs_attr3_rmt_hdr *rmt = ptr; if (bno != be64_to_cpu(rmt->rm_blkno)) return __this_address; if (offset != be32_to_cpu(rmt->rm_offset)) return __this_address; if (size != be32_to_cpu(rmt->rm_bytes)) return __this_address; if (ino != be64_to_cpu(rmt->rm_owner)) return __this_address; /* ok */ return NULL; } static xfs_failaddr_t xfs_attr3_rmt_verify( struct xfs_mount *mp, struct xfs_buf *bp, void *ptr, xfs_daddr_t bno) { struct xfs_attr3_rmt_hdr *rmt = ptr; if (!xfs_verify_magic(bp, rmt->rm_magic)) return __this_address; if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (be64_to_cpu(rmt->rm_blkno) != bno) return __this_address; if (be32_to_cpu(rmt->rm_bytes) > mp->m_attr_geo->blksize - sizeof(*rmt)) return __this_address; if (be32_to_cpu(rmt->rm_offset) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) return __this_address; if (rmt->rm_owner == 0) return __this_address; return NULL; } static int __xfs_attr3_rmt_read_verify( struct xfs_buf *bp, bool check_crc, xfs_failaddr_t *failaddr) { struct xfs_mount *mp = bp->b_mount; char *ptr; unsigned int len; xfs_daddr_t bno; unsigned int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_has_crc(mp)) return 0; ptr = bp->b_addr; bno = xfs_buf_daddr(bp); len = BBTOB(bp->b_length); ASSERT(len >= blksize); while (len > 0) { if (check_crc && !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { *failaddr = __this_address; return -EFSBADCRC; } *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, bno); if (*failaddr) return -EFSCORRUPTED; len -= blksize; ptr += blksize; bno += BTOBB(blksize); } if (len != 0) { *failaddr = __this_address; return -EFSCORRUPTED; } return 0; } static void xfs_attr3_rmt_read_verify( struct xfs_buf *bp) { xfs_failaddr_t fa; int error; error = __xfs_attr3_rmt_read_verify(bp, true, &fa); if (error) xfs_verifier_error(bp, error, fa); } static xfs_failaddr_t xfs_attr3_rmt_verify_struct( struct xfs_buf *bp) { xfs_failaddr_t fa; int error; error = __xfs_attr3_rmt_read_verify(bp, false, &fa); return error ? fa : NULL; } static void xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; unsigned int blksize = mp->m_attr_geo->blksize; char *ptr; int len; xfs_daddr_t bno; /* no verification of non-crc buffers */ if (!xfs_has_crc(mp)) return; ptr = bp->b_addr; bno = xfs_buf_daddr(bp); len = BBTOB(bp->b_length); ASSERT(len >= blksize); while (len > 0) { struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; fa = xfs_attr3_rmt_verify(mp, bp, ptr, bno); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } /* * Ensure we aren't writing bogus LSNs to disk. See * xfs_attr3_rmt_hdr_set() for the explanation. */ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); len -= blksize; ptr += blksize; bno += BTOBB(blksize); } if (len != 0) xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { .name = "xfs_attr3_rmt", .magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) }, .verify_read = xfs_attr3_rmt_read_verify, .verify_write = xfs_attr3_rmt_write_verify, .verify_struct = xfs_attr3_rmt_verify_struct, }; STATIC int xfs_attr3_rmt_hdr_set( struct xfs_mount *mp, void *ptr, xfs_ino_t ino, uint32_t offset, uint32_t size, xfs_daddr_t bno) { struct xfs_attr3_rmt_hdr *rmt = ptr; if (!xfs_has_crc(mp)) return 0; rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); rmt->rm_offset = cpu_to_be32(offset); rmt->rm_bytes = cpu_to_be32(size); uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); rmt->rm_owner = cpu_to_be64(ino); rmt->rm_blkno = cpu_to_be64(bno); /* * Remote attribute blocks are written synchronously, so we don't * have an LSN that we can stamp in them that makes any sense to log * recovery. To ensure that log recovery handles overwrites of these * blocks sanely (i.e. once they've been freed and reallocated as some * other type of metadata) we need to ensure that the LSN has a value * that tells log recovery to ignore the LSN and overwrite the buffer * with whatever is in it's log. To do this, we use the magic * NULLCOMMITLSN to indicate that the LSN is invalid. */ rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); return sizeof(struct xfs_attr3_rmt_hdr); } /* * Helper functions to copy attribute data in and out of the one disk extents */ STATIC int xfs_attr_rmtval_copyout( struct xfs_mount *mp, struct xfs_buf *bp, struct xfs_inode *dp, xfs_ino_t owner, unsigned int *offset, unsigned int *valuelen, uint8_t **dst) { char *src = bp->b_addr; xfs_daddr_t bno = xfs_buf_daddr(bp); unsigned int len = BBTOB(bp->b_length); unsigned int blksize = mp->m_attr_geo->blksize; ASSERT(len >= blksize); while (len > 0 && *valuelen > 0) { unsigned int hdr_size = 0; unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp); byte_cnt = min(*valuelen, byte_cnt); if (xfs_has_crc(mp)) { if (xfs_attr3_rmt_hdr_ok(src, owner, *offset, byte_cnt, bno)) { xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", bno, *offset, byte_cnt, owner); xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); return -EFSCORRUPTED; } hdr_size = sizeof(struct xfs_attr3_rmt_hdr); } memcpy(*dst, src + hdr_size, byte_cnt); /* roll buffer forwards */ len -= blksize; src += blksize; bno += BTOBB(blksize); /* roll attribute data forwards */ *valuelen -= byte_cnt; *dst += byte_cnt; *offset += byte_cnt; } return 0; } STATIC void xfs_attr_rmtval_copyin( struct xfs_mount *mp, struct xfs_buf *bp, xfs_ino_t ino, unsigned int *offset, unsigned int *valuelen, uint8_t **src) { char *dst = bp->b_addr; xfs_daddr_t bno = xfs_buf_daddr(bp); unsigned int len = BBTOB(bp->b_length); unsigned int blksize = mp->m_attr_geo->blksize; ASSERT(len >= blksize); while (len > 0 && *valuelen > 0) { unsigned int hdr_size; unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp); byte_cnt = min(*valuelen, byte_cnt); hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, byte_cnt, bno); memcpy(dst + hdr_size, *src, byte_cnt); /* * If this is the last block, zero the remainder of it. * Check that we are actually the last block, too. */ if (byte_cnt + hdr_size < blksize) { ASSERT(*valuelen - byte_cnt == 0); ASSERT(len == blksize); memset(dst + hdr_size + byte_cnt, 0, blksize - hdr_size - byte_cnt); } /* roll buffer forwards */ len -= blksize; dst += blksize; bno += BTOBB(blksize); /* roll attribute data forwards */ *valuelen -= byte_cnt; *src += byte_cnt; *offset += byte_cnt; } } /* * Read the value associated with an attribute from the out-of-line buffer * that we stored it in. * * Returns 0 on successful retrieval, otherwise an error. */ int xfs_attr_rmtval_get( struct xfs_da_args *args) { struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; struct xfs_mount *mp = args->dp->i_mount; struct xfs_buf *bp; xfs_dablk_t lblkno = args->rmtblkno; uint8_t *dst = args->value; unsigned int valuelen; int nmap; int error; unsigned int blkcnt = args->rmtblkcnt; int i; unsigned int offset = 0; trace_xfs_attr_rmtval_get(args); ASSERT(args->valuelen != 0); ASSERT(args->rmtvaluelen == args->valuelen); valuelen = args->rmtvaluelen; while (valuelen > 0) { nmap = ATTR_RMTVALUE_MAPSIZE; error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, blkcnt, map, &nmap, XFS_BMAPI_ATTRFORK); if (error) return error; ASSERT(nmap >= 1); for (i = 0; (i < nmap) && (valuelen > 0); i++) { xfs_daddr_t dblkno; int dblkcnt; ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && (map[i].br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0, &bp, &xfs_attr3_rmt_buf_ops); if (xfs_metadata_is_sick(error)) xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK); if (error) return error; error = xfs_attr_rmtval_copyout(mp, bp, args->dp, args->owner, &offset, &valuelen, &dst); xfs_buf_relse(bp); if (error) return error; /* roll attribute extent map forwards */ lblkno += map[i].br_blockcount; blkcnt -= map[i].br_blockcount; } } ASSERT(valuelen == 0); return 0; } /* * Find a "hole" in the attribute address space large enough for us to drop the * new attributes value into */ int xfs_attr_rmt_find_hole( struct xfs_da_args *args) { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; int error; unsigned int blkcnt; xfs_fileoff_t lfileoff = 0; /* * Because CRC enable attributes have headers, we can't just do a * straight byte to FSB conversion and have to take the header space * into account. */ blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, XFS_ATTR_FORK); if (error) return error; args->rmtblkno = (xfs_dablk_t)lfileoff; args->rmtblkcnt = blkcnt; return 0; } int xfs_attr_rmtval_set_value( struct xfs_da_args *args) { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_bmbt_irec map; xfs_dablk_t lblkno; uint8_t *src = args->value; unsigned int blkcnt; unsigned int valuelen; int nmap; int error; unsigned int offset = 0; /* * Roll through the "value", copying the attribute value to the * already-allocated blocks. Blocks are written synchronously * so that we can know they are all on disk before we turn off * the INCOMPLETE flag. */ lblkno = args->rmtblkno; blkcnt = args->rmtblkcnt; valuelen = args->rmtvaluelen; while (valuelen > 0) { struct xfs_buf *bp; xfs_daddr_t dblkno; int dblkcnt; ASSERT(blkcnt > 0); nmap = 1; error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) return error; ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); error = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, &bp); if (error) return error; bp->b_ops = &xfs_attr3_rmt_buf_ops; xfs_attr_rmtval_copyin(mp, bp, args->owner, &offset, &valuelen, &src); error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ xfs_buf_relse(bp); if (error) return error; /* roll attribute extent map forwards */ lblkno += map.br_blockcount; blkcnt -= map.br_blockcount; } ASSERT(valuelen == 0); return 0; } /* Mark stale any incore buffers for the remote value. */ int xfs_attr_rmtval_stale( struct xfs_inode *ip, struct xfs_bmbt_irec *map, xfs_buf_flags_t incore_flags) { struct xfs_mount *mp = ip->i_mount; struct xfs_buf *bp; int error; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (XFS_IS_CORRUPT(mp, map->br_startblock == DELAYSTARTBLOCK) || XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK)) { xfs_bmap_mark_sick(ip, XFS_ATTR_FORK); return -EFSCORRUPTED; } error = xfs_buf_incore(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map->br_startblock), XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags, &bp); if (error) { if (error == -ENOENT) return 0; return error; } xfs_buf_stale(bp); xfs_buf_relse(bp); return 0; } /* * Find a hole for the attr and store it in the delayed attr context. This * initializes the context to roll through allocating an attr extent for a * delayed attr operation */ int xfs_attr_rmtval_find_space( struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; struct xfs_bmbt_irec *map = &attr->xattri_map; int error; attr->xattri_lblkno = 0; attr->xattri_blkcnt = 0; args->rmtblkcnt = 0; args->rmtblkno = 0; memset(map, 0, sizeof(struct xfs_bmbt_irec)); error = xfs_attr_rmt_find_hole(args); if (error) return error; attr->xattri_blkcnt = args->rmtblkcnt; attr->xattri_lblkno = args->rmtblkno; return 0; } /* * Write one block of the value associated with an attribute into the * out-of-line buffer that we have defined for it. This is similar to a subset * of xfs_attr_rmtval_set, but records the current block to the delayed attr * context, and leaves transaction handling to the caller. */ int xfs_attr_rmtval_set_blk( struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; struct xfs_inode *dp = args->dp; struct xfs_bmbt_irec *map = &attr->xattri_map; int nmap; int error; nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)attr->xattri_lblkno, attr->xattri_blkcnt, XFS_BMAPI_ATTRFORK, args->total, map, &nmap); if (error) return error; ASSERT((map->br_startblock != DELAYSTARTBLOCK) && (map->br_startblock != HOLESTARTBLOCK)); /* roll attribute extent map forwards */ attr->xattri_lblkno += map->br_blockcount; attr->xattri_blkcnt -= map->br_blockcount; return 0; } /* * Remove the value associated with an attribute by deleting the * out-of-line buffer that it is stored on. */ int xfs_attr_rmtval_invalidate( struct xfs_da_args *args) { xfs_dablk_t lblkno; unsigned int blkcnt; int error; /* * Roll through the "value", invalidating the attribute value's blocks. */ lblkno = args->rmtblkno; blkcnt = args->rmtblkcnt; while (blkcnt > 0) { struct xfs_bmbt_irec map; int nmap; /* * Try to remember where we decided to put the value. */ nmap = 1; error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) return error; if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1)) { xfs_bmap_mark_sick(args->dp, XFS_ATTR_FORK); return -EFSCORRUPTED; } error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK); if (error) return error; lblkno += map.br_blockcount; blkcnt -= map.br_blockcount; } return 0; } /* * Remove the value associated with an attribute by deleting the out-of-line * buffer that it is stored on. Returns -EAGAIN for the caller to refresh the * transaction and re-call the function. Callers should keep calling this * routine until it returns something other than -EAGAIN. */ int xfs_attr_rmtval_remove( struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; int error, done; /* * Unmap value blocks for this attr. */ error = xfs_bunmapi(args->trans, args->dp, args->rmtblkno, args->rmtblkcnt, XFS_BMAPI_ATTRFORK, 1, &done); if (error) return error; /* * We don't need an explicit state here to pick up where we left off. We * can figure it out using the !done return code. The actual value of * attr->xattri_dela_state may be some value reminiscent of the calling * function, but it's value is irrelevant with in the context of this * function. Once we are done here, the next state is set as needed by * the parent */ if (!done) { trace_xfs_attr_rmtval_remove_return(attr->xattri_dela_state, args->dp); return -EAGAIN; } args->rmtblkno = 0; args->rmtblkcnt = 0; return 0; }
19 14 19 26 3 20 2 2 1 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 // SPDX-License-Identifier: GPL-2.0-or-later /* * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173 * * Copyright (C)2003 USAGI/WIDE Project * * Author Mitsuru KANDA <mk@linux-ipv6.org> */ /* * [Memo] * * Outbound: * The compression of IP datagram MUST be done before AH/ESP processing, * fragmentation, and the addition of Hop-by-Hop/Routing header. * * Inbound: * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipcomp.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <linux/percpu.h> #include <linux/smp.h> #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/mutex.h> static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); __be32 spi; const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; struct ip_comp_hdr *ipcomph = (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); struct xfrm_state *t = NULL; t = xfrm_state_alloc(net); if (!t) goto out; t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); if (!t->id.spi) goto error; memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); memcpy(&t->mark, &x->mark, sizeof(t->mark)); t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; atomic_set(&t->tunnel_users, 1); out: return t; error: t->km.state = XFRM_STATE_DEAD; xfrm_state_put(t); t = NULL; goto out; } static int ipcomp6_tunnel_attach(struct xfrm_state *x) { struct net *net = xs_net(x); int err = 0; struct xfrm_state *t = NULL; __be32 spi; u32 mark = x->mark.m & x->mark.v; spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr); if (spi) t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); if (!t) { err = -EINVAL; goto out; } xfrm_state_insert(t); xfrm_state_hold(t); } x->tunnel = t; atomic_inc(&t->tunnel_users); out: return err; } static int ipcomp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; default: NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) { NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; } } err = 0; out: return err; } static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type ipcomp6_type = { .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output, }; static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, }; static int __init ipcomp6_init(void) { if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; } return 0; } static void __exit ipcomp6_fini(void) { if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); } module_init(ipcomp6_init); module_exit(ipcomp6_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
21 18 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 /* SPDX-License-Identifier: GPL-2.0-only */ /* L2TP internal definitions. * * Copyright (c) 2008,2009 Katalix Systems Ltd */ #include <linux/refcount.h> #ifndef _L2TP_CORE_H_ #define _L2TP_CORE_H_ #include <net/dst.h> #include <net/sock.h> #ifdef CONFIG_XFRM #include <net/xfrm.h> #endif /* Random numbers used for internal consistency checks of tunnel and session structures */ #define L2TP_SESSION_MAGIC 0x0C04EB7D struct sk_buff; struct l2tp_stats { atomic_long_t tx_packets; atomic_long_t tx_bytes; atomic_long_t tx_errors; atomic_long_t rx_packets; atomic_long_t rx_bytes; atomic_long_t rx_seq_discards; atomic_long_t rx_oos_packets; atomic_long_t rx_errors; atomic_long_t rx_cookie_discards; atomic_long_t rx_invalid; }; struct l2tp_tunnel; /* L2TP session configuration */ struct l2tp_session_cfg { enum l2tp_pwtype pw_type; unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */ unsigned int send_seq:1; /* send packets with sequence numbers? */ unsigned int lns_mode:1; /* behave as LNS? * LAC enables sequence numbers under LNS control. */ u16 l2specific_type; /* Layer 2 specific type */ u8 cookie[8]; /* optional cookie */ int cookie_len; /* 0, 4 or 8 bytes */ u8 peer_cookie[8]; /* peer's cookie */ int peer_cookie_len; /* 0, 4 or 8 bytes */ int reorder_timeout; /* configured reorder timeout (in jiffies) */ char *ifname; }; struct l2tp_session_coll_list { spinlock_t lock; /* for access to list */ struct list_head list; refcount_t ref_count; }; /* Represents a session (pseudowire) instance. * Tracks runtime state including cookies, dataplane packet sequencing, and IO statistics. * Is linked into a per-tunnel session list and a per-net ("global") IDR tree. */ #define L2TP_SESSION_NAME_MAX 32 struct l2tp_session { int magic; /* should be L2TP_SESSION_MAGIC */ long dead; struct rcu_head rcu; struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */ u32 session_id; u32 peer_session_id; u8 cookie[8]; int cookie_len; u8 peer_cookie[8]; int peer_cookie_len; u16 l2specific_type; u16 hdr_len; u32 nr; /* session NR state (receive) */ u32 ns; /* session NR state (send) */ struct sk_buff_head reorder_q; /* receive reorder queue */ u32 nr_max; /* max NR. Depends on tunnel */ u32 nr_window_size; /* NR window size */ u32 nr_oos; /* NR of last OOS packet */ int nr_oos_count; /* for OOS recovery */ int nr_oos_count_max; struct list_head list; /* per-tunnel list node */ refcount_t ref_count; struct hlist_node hlist; /* per-net session hlist */ unsigned long hlist_key; /* key for session hlist */ struct l2tp_session_coll_list *coll_list; /* session collision list */ struct list_head clist; /* for coll_list */ char name[L2TP_SESSION_NAME_MAX]; /* for logging */ char ifname[IFNAMSIZ]; unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */ unsigned int send_seq:1; /* send packets with sequence numbers? */ unsigned int lns_mode:1; /* behave as LNS? * LAC enables sequence numbers under LNS control. */ int reorder_timeout; /* configured reorder timeout (in jiffies) */ int reorder_skip; /* set if skip to next nr */ enum l2tp_pwtype pwtype; struct l2tp_stats stats; struct work_struct del_work; /* Session receive handler for data packets. * Each pseudowire implementation should implement this callback in order to * handle incoming packets. Packets are passed to the pseudowire handler after * reordering, if data sequence numbers are enabled for the session. */ void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); /* Session close handler. * Each pseudowire implementation may implement this callback in order to carry * out pseudowire-specific shutdown actions. * The callback is called by core after unlisting the session and purging its * reorder queue. */ void (*session_close)(struct l2tp_session *session); /* Session show handler. * Pseudowire-specific implementation of debugfs session rendering. * The callback is called by l2tp_debugfs.c after rendering core session * information. */ void (*show)(struct seq_file *m, void *priv); u8 priv[]; /* private data */ }; /* L2TP tunnel configuration */ struct l2tp_tunnel_cfg { enum l2tp_encap_type encap; /* Used only for kernel-created sockets */ struct in_addr local_ip; struct in_addr peer_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr *local_ip6; struct in6_addr *peer_ip6; #endif u16 local_udp_port; u16 peer_udp_port; unsigned int use_udp_checksums:1, udp6_zero_tx_checksums:1, udp6_zero_rx_checksums:1; }; /* Represents a tunnel instance. * Tracks runtime state including IO statistics. * Holds the tunnel socket (either passed from userspace or directly created by the kernel). * Maintains a list of sessions belonging to the tunnel instance. * Is linked into a per-net list of tunnels. */ #define L2TP_TUNNEL_NAME_MAX 20 struct l2tp_tunnel { unsigned long dead; struct rcu_head rcu; spinlock_t list_lock; /* write-protection for session_list */ bool acpt_newsess; /* indicates whether this tunnel accepts * new sessions. Protected by list_lock. */ struct list_head session_list; /* list of sessions */ u32 tunnel_id; u32 peer_tunnel_id; int version; /* 2=>L2TPv2, 3=>L2TPv3 */ char name[L2TP_TUNNEL_NAME_MAX]; /* for logging */ enum l2tp_encap_type encap; struct l2tp_stats stats; struct net *l2tp_net; /* the net we belong to */ refcount_t ref_count; struct sock *sock; /* parent socket */ int fd; /* parent fd, if tunnel socket was created * by userspace */ struct work_struct del_work; }; /* Pseudowire ops callbacks for use with the l2tp genetlink interface */ struct l2tp_nl_cmd_ops { /* The pseudowire session create callback is responsible for creating a session * instance for a specific pseudowire type. * It must call l2tp_session_create and l2tp_session_register to register the * session instance, as well as carry out any pseudowire-specific initialisation. * It must return >= 0 on success, or an appropriate negative errno value on failure. */ int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); /* The pseudowire session delete callback is responsible for initiating the deletion * of a session instance. * It must call l2tp_session_delete, as well as carry out any pseudowire-specific * teardown actions. */ void (*session_delete)(struct l2tp_session *session); }; static inline void *l2tp_session_priv(struct l2tp_session *session) { return &session->priv[0]; } /* Tunnel and session refcounts */ void l2tp_tunnel_put(struct l2tp_tunnel *tunnel); void l2tp_session_put(struct l2tp_session *session); /* Tunnel and session lookup. * These functions take a reference on the instances they return, so * the caller must ensure that the reference is dropped appropriately. */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key); struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id); struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id); struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver, u32 tunnel_id, u32 session_id); struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver, u32 tunnel_id, unsigned long *key); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); /* Tunnel and session lifetime management. * Creation of a new instance is a two-step process: create, then register. * Destruction is triggered using the *_delete functions, and completes asynchronously. */ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg); void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel); void l2tp_session_delete(struct l2tp_session *session); /* Receive path helpers. If data sequencing is enabled for the session these * functions handle queuing and reordering prior to passing packets to the * pseudowire code to be passed to userspace. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); /* Transmit path helpers for sending packets over the tunnel socket. */ void l2tp_session_set_header_len(struct l2tp_session *session, int version, enum l2tp_encap_type encap); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb); /* Pseudowire management. * Pseudowires should register with l2tp core on module init, and unregister * on module exit. */ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); /* IOCTL helper for IP encap modules. */ int l2tp_ioctl(struct sock *sk, int cmd, int *karg); struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk); static inline int l2tp_get_l2specific_len(struct l2tp_session *session) { switch (session->l2specific_type) { case L2TP_L2SPECTYPE_DEFAULT: return 4; case L2TP_L2SPECTYPE_NONE: default: return 0; } } static inline u32 l2tp_tunnel_dst_mtu(const struct l2tp_tunnel *tunnel) { struct dst_entry *dst; u32 mtu; dst = sk_dst_get(tunnel->sock); if (!dst) return 0; mtu = dst_mtu(dst); dst_release(dst); return mtu; } #ifdef CONFIG_XFRM static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) { struct sock *sk = tunnel->sock; return sk && (rcu_access_pointer(sk->sk_policy[0]) || rcu_access_pointer(sk->sk_policy[1])); } #else static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) { return false; } #endif static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, unsigned char **ptr, unsigned char **optr) { int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); if (opt_len > 0) { int off = *ptr - *optr; if (!pskb_may_pull(skb, off + opt_len)) return -1; if (skb->data != *optr) { *optr = skb->data; *ptr = skb->data + off; } } return 0; } #define MODULE_ALIAS_L2TP_PWTYPE(type) \ MODULE_ALIAS("net-l2tp-type-" __stringify(type)) #endif /* _L2TP_CORE_H_ */
15 217 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Checksumming functions for IP, TCP, UDP and so on * * Authors: Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Borrows very liberally from tcp.c and ip.c, see those * files for more names. */ #ifndef _CHECKSUM_H #define _CHECKSUM_H #include <linux/errno.h> #include <asm/types.h> #include <asm/byteorder.h> #include <asm/checksum.h> #if !defined(_HAVE_ARCH_COPY_AND_CSUM_FROM_USER) || !defined(HAVE_CSUM_COPY_USER) #include <linux/uaccess.h> #endif #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER static __always_inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len) { if (copy_from_user(dst, src, len)) return 0; return csum_partial(dst, len, ~0U); } #endif #ifndef HAVE_CSUM_COPY_USER static __always_inline __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len) { __wsum sum = csum_partial(src, len, ~0U); if (copy_to_user(dst, src, len) == 0) return sum; return 0; } #endif #ifndef _HAVE_ARCH_CSUM_AND_COPY static __always_inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { memcpy(dst, src, len); return csum_partial(dst, len, 0); } #endif #ifndef HAVE_ARCH_CSUM_ADD static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; return (__force __wsum)(res + (res < (__force u32)addend)); } #endif static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; res += (__force u16)addend; return (__force __sum16)(res + (res < (__force u16)addend)); } static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } #ifndef HAVE_ARCH_CSUM_SHIFT static __always_inline __wsum csum_shift(__wsum sum, int offset) { /* rotate sum to align it with a 16b boundary */ if (offset & 1) return (__force __wsum)ror32((__force u32)sum, 8); return sum; } #endif static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { return csum_add(csum, csum_shift(csum2, offset)); } static __always_inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } static __always_inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } static __always_inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); *sum = csum_fold(csum_add(tmp, (__force __wsum)to)); } /* Implements RFC 1624 (Incremental Internet Checksum) * 3. Discussion states : * HC' = ~(~HC + ~m + m') * m : old value of a 16bit field * m' : new value of a 16bit field */ static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) { *csum = csum_add(csum_sub(*csum, old), new); } static inline unsigned short csum_from32to16(unsigned int sum) { sum += (sum >> 16) | (sum << 16); return (unsigned short)(sum >> 16); } struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); static __always_inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; /* Subtract out checksum up to start */ csum = csum_sub(csum, csum_partial(ptr, start, 0)); /* Set derived checksum in packet */ delta = csum_sub((__force __wsum)csum_fold(csum), (__force __wsum)*psum); *psum = csum_fold(csum); return delta; } static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } static __always_inline __wsum wsum_negate(__wsum val) { return (__force __wsum)-((__force u32)val); } #endif
2 2 2 2 4 4 4 4 4 7 3 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 // SPDX-License-Identifier: GPL-2.0-only /* * Backlight Lowlevel Control Abstraction * * Copyright (C) 2003,2004 Hewlett-Packard Company * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> #include <linux/backlight.h> #include <linux/notifier.h> #include <linux/ctype.h> #include <linux/err.h> #include <linux/fb.h> #include <linux/slab.h> #ifdef CONFIG_PMAC_BACKLIGHT #include <asm/backlight.h> #endif /** * DOC: overview * * The backlight core supports implementing backlight drivers. * * A backlight driver registers a driver using * devm_backlight_device_register(). The properties of the backlight * driver such as type and max_brightness must be specified. * When the core detect changes in for example brightness or power state * the update_status() operation is called. The backlight driver shall * implement this operation and use it to adjust backlight. * * Several sysfs attributes are provided by the backlight core:: * * - brightness R/W, set the requested brightness level * - actual_brightness RO, the brightness level used by the HW * - max_brightness RO, the maximum brightness level supported * * See Documentation/ABI/stable/sysfs-class-backlight for the full list. * * The backlight can be adjusted using the sysfs interface, and * the backlight driver may also support adjusting backlight using * a hot-key or some other platform or firmware specific way. * * The driver must implement the get_brightness() operation if * the HW do not support all the levels that can be specified in * brightness, thus providing user-space access to the actual level * via the actual_brightness attribute. * * When the backlight changes this is reported to user-space using * an uevent connected to the actual_brightness attribute. * When brightness is set by platform specific means, for example * a hot-key to adjust backlight, the driver must notify the backlight * core that brightness has changed using backlight_force_update(). * * The backlight driver core receives notifications from fbdev and * if the event is FB_EVENT_BLANK and if the value of blank, from the * FBIOBLANK ioctrl, results in a change in the backlight state the * update_status() operation is called. */ static struct list_head backlight_dev_list; static struct mutex backlight_dev_list_mutex; static const char *const backlight_types[] = { [BACKLIGHT_RAW] = "raw", [BACKLIGHT_PLATFORM] = "platform", [BACKLIGHT_FIRMWARE] = "firmware", }; static const char *const backlight_scale_types[] = { [BACKLIGHT_SCALE_UNKNOWN] = "unknown", [BACKLIGHT_SCALE_LINEAR] = "linear", [BACKLIGHT_SCALE_NON_LINEAR] = "non-linear", }; #if defined(CONFIG_FB_CORE) || (defined(CONFIG_FB_CORE_MODULE) && \ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)) /* * fb_notifier_callback * * This callback gets called when something important happens inside a * framebuffer driver. The backlight core only cares about FB_BLANK_UNBLANK * which is reported to the driver using backlight_update_status() * as a state change. * * There may be several fbdev's connected to the backlight device, * in which case they are kept track of. A state change is only reported * if there is a change in backlight for the specified fbdev. */ static int fb_notifier_callback(struct notifier_block *self, unsigned long event, void *data) { struct backlight_device *bd; struct fb_event *evdata = data; struct fb_info *info = evdata->info; struct backlight_device *fb_bd = fb_bl_device(info); int node = info->node; int fb_blank = 0; /* If we aren't interested in this event, skip it immediately ... */ if (event != FB_EVENT_BLANK) return 0; bd = container_of(self, struct backlight_device, fb_notif); mutex_lock(&bd->ops_lock); if (!bd->ops) goto out; if (bd->ops->controls_device && !bd->ops->controls_device(bd, info->device)) goto out; if (fb_bd && fb_bd != bd) goto out; fb_blank = *(int *)evdata->data; if (fb_blank == FB_BLANK_UNBLANK && !bd->fb_bl_on[node]) { bd->fb_bl_on[node] = true; if (!bd->use_count++) { bd->props.state &= ~BL_CORE_FBBLANK; backlight_update_status(bd); } } else if (fb_blank != FB_BLANK_UNBLANK && bd->fb_bl_on[node]) { bd->fb_bl_on[node] = false; if (!(--bd->use_count)) { bd->props.state |= BL_CORE_FBBLANK; backlight_update_status(bd); } } out: mutex_unlock(&bd->ops_lock); return 0; } static int backlight_register_fb(struct backlight_device *bd) { memset(&bd->fb_notif, 0, sizeof(bd->fb_notif)); bd->fb_notif.notifier_call = fb_notifier_callback; return fb_register_client(&bd->fb_notif); } static void backlight_unregister_fb(struct backlight_device *bd) { fb_unregister_client(&bd->fb_notif); } #else static inline int backlight_register_fb(struct backlight_device *bd) { return 0; } static inline void backlight_unregister_fb(struct backlight_device *bd) { } #endif /* CONFIG_FB_CORE */ static void backlight_generate_event(struct backlight_device *bd, enum backlight_update_reason reason) { char *envp[2]; switch (reason) { case BACKLIGHT_UPDATE_SYSFS: envp[0] = "SOURCE=sysfs"; break; case BACKLIGHT_UPDATE_HOTKEY: envp[0] = "SOURCE=hotkey"; break; default: envp[0] = "SOURCE=unknown"; break; } envp[1] = NULL; kobject_uevent_env(&bd->dev.kobj, KOBJ_CHANGE, envp); sysfs_notify(&bd->dev.kobj, NULL, "actual_brightness"); } static ssize_t bl_power_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.power); } static ssize_t bl_power_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; struct backlight_device *bd = to_backlight_device(dev); unsigned long power, old_power; rc = kstrtoul(buf, 0, &power); if (rc) return rc; rc = -ENXIO; mutex_lock(&bd->ops_lock); if (bd->ops) { pr_debug("set power to %lu\n", power); if (bd->props.power != power) { old_power = bd->props.power; bd->props.power = power; rc = backlight_update_status(bd); if (rc) bd->props.power = old_power; else rc = count; } else { rc = count; } } mutex_unlock(&bd->ops_lock); return rc; } static DEVICE_ATTR_RW(bl_power); static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.brightness); } int backlight_device_set_brightness(struct backlight_device *bd, unsigned long brightness) { int rc = -ENXIO; mutex_lock(&bd->ops_lock); if (bd->ops) { if (brightness > bd->props.max_brightness) rc = -EINVAL; else { pr_debug("set brightness to %lu\n", brightness); bd->props.brightness = brightness; rc = backlight_update_status(bd); } } mutex_unlock(&bd->ops_lock); backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS); return rc; } EXPORT_SYMBOL(backlight_device_set_brightness); static ssize_t brightness_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; struct backlight_device *bd = to_backlight_device(dev); unsigned long brightness; rc = kstrtoul(buf, 0, &brightness); if (rc) return rc; rc = backlight_device_set_brightness(bd, brightness); return rc ? rc : count; } static DEVICE_ATTR_RW(brightness); static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%s\n", backlight_types[bd->props.type]); } static DEVICE_ATTR_RO(type); static ssize_t max_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.max_brightness); } static DEVICE_ATTR_RO(max_brightness); static ssize_t actual_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { int rc = -ENXIO; struct backlight_device *bd = to_backlight_device(dev); mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->get_brightness) { rc = bd->ops->get_brightness(bd); if (rc >= 0) rc = sprintf(buf, "%d\n", rc); } else { rc = sprintf(buf, "%d\n", bd->props.brightness); } mutex_unlock(&bd->ops_lock); return rc; } static DEVICE_ATTR_RO(actual_brightness); static ssize_t scale_show(struct device *dev, struct device_attribute *attr, char *buf) { struct backlight_device *bd = to_backlight_device(dev); if (WARN_ON(bd->props.scale > BACKLIGHT_SCALE_NON_LINEAR)) return sprintf(buf, "unknown\n"); return sprintf(buf, "%s\n", backlight_scale_types[bd->props.scale]); } static DEVICE_ATTR_RO(scale); #ifdef CONFIG_PM_SLEEP static int backlight_suspend(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) { bd->props.state |= BL_CORE_SUSPENDED; backlight_update_status(bd); } mutex_unlock(&bd->ops_lock); return 0; } static int backlight_resume(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) { bd->props.state &= ~BL_CORE_SUSPENDED; backlight_update_status(bd); } mutex_unlock(&bd->ops_lock); return 0; } #endif static SIMPLE_DEV_PM_OPS(backlight_class_dev_pm_ops, backlight_suspend, backlight_resume); static void bl_device_release(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); kfree(bd); } static struct attribute *bl_device_attrs[] = { &dev_attr_bl_power.attr, &dev_attr_brightness.attr, &dev_attr_actual_brightness.attr, &dev_attr_max_brightness.attr, &dev_attr_scale.attr, &dev_attr_type.attr, NULL, }; ATTRIBUTE_GROUPS(bl_device); static const struct class backlight_class = { .name = "backlight", .dev_groups = bl_device_groups, .pm = &backlight_class_dev_pm_ops, }; /** * backlight_force_update - tell the backlight subsystem that hardware state * has changed * @bd: the backlight device to update * @reason: reason for update * * Updates the internal state of the backlight in response to a hardware event, * and generates an uevent to notify userspace. A backlight driver shall call * backlight_force_update() when the backlight is changed using, for example, * a hot-key. The updated brightness is read using get_brightness() and the * brightness value is reported using an uevent. */ void backlight_force_update(struct backlight_device *bd, enum backlight_update_reason reason) { int brightness; mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->get_brightness) { brightness = bd->ops->get_brightness(bd); if (brightness >= 0) bd->props.brightness = brightness; else dev_err(&bd->dev, "Could not update brightness from device: %pe\n", ERR_PTR(brightness)); } mutex_unlock(&bd->ops_lock); backlight_generate_event(bd, reason); } EXPORT_SYMBOL(backlight_force_update); /* deprecated - use devm_backlight_device_register() */ struct backlight_device *backlight_device_register(const char *name, struct device *parent, void *devdata, const struct backlight_ops *ops, const struct backlight_properties *props) { struct backlight_device *new_bd; int rc; pr_debug("backlight_device_register: name=%s\n", name); new_bd = kzalloc(sizeof(struct backlight_device), GFP_KERNEL); if (!new_bd) return ERR_PTR(-ENOMEM); mutex_init(&new_bd->update_lock); mutex_init(&new_bd->ops_lock); new_bd->dev.class = &backlight_class; new_bd->dev.parent = parent; new_bd->dev.release = bl_device_release; dev_set_name(&new_bd->dev, "%s", name); dev_set_drvdata(&new_bd->dev, devdata); /* Set default properties */ if (props) { memcpy(&new_bd->props, props, sizeof(struct backlight_properties)); if (props->type <= 0 || props->type >= BACKLIGHT_TYPE_MAX) { WARN(1, "%s: invalid backlight type", name); new_bd->props.type = BACKLIGHT_RAW; } } else { new_bd->props.type = BACKLIGHT_RAW; } rc = device_register(&new_bd->dev); if (rc) { put_device(&new_bd->dev); return ERR_PTR(rc); } rc = backlight_register_fb(new_bd); if (rc) { device_unregister(&new_bd->dev); return ERR_PTR(rc); } new_bd->ops = ops; #ifdef CONFIG_PMAC_BACKLIGHT mutex_lock(&pmac_backlight_mutex); if (!pmac_backlight) pmac_backlight = new_bd; mutex_unlock(&pmac_backlight_mutex); #endif mutex_lock(&backlight_dev_list_mutex); list_add(&new_bd->entry, &backlight_dev_list); mutex_unlock(&backlight_dev_list_mutex); return new_bd; } EXPORT_SYMBOL(backlight_device_register); /** backlight_device_get_by_type - find first backlight device of a type * @type: the type of backlight device * * Look up the first backlight device of the specified type * * RETURNS: * * Pointer to backlight device if any was found. Otherwise NULL. */ struct backlight_device *backlight_device_get_by_type(enum backlight_type type) { bool found = false; struct backlight_device *bd; mutex_lock(&backlight_dev_list_mutex); list_for_each_entry(bd, &backlight_dev_list, entry) { if (bd->props.type == type) { found = true; break; } } mutex_unlock(&backlight_dev_list_mutex); return found ? bd : NULL; } EXPORT_SYMBOL(backlight_device_get_by_type); /** * backlight_device_get_by_name - Get backlight device by name * @name: Device name * * This function looks up a backlight device by its name. It obtains a reference * on the backlight device and it is the caller's responsibility to drop the * reference by calling put_device(). * * Returns: * A pointer to the backlight device if found, otherwise NULL. */ struct backlight_device *backlight_device_get_by_name(const char *name) { struct device *dev; dev = class_find_device_by_name(&backlight_class, name); return dev ? to_backlight_device(dev) : NULL; } EXPORT_SYMBOL(backlight_device_get_by_name); /* deprecated - use devm_backlight_device_unregister() */ void backlight_device_unregister(struct backlight_device *bd) { if (!bd) return; mutex_lock(&backlight_dev_list_mutex); list_del(&bd->entry); mutex_unlock(&backlight_dev_list_mutex); #ifdef CONFIG_PMAC_BACKLIGHT mutex_lock(&pmac_backlight_mutex); if (pmac_backlight == bd) pmac_backlight = NULL; mutex_unlock(&pmac_backlight_mutex); #endif mutex_lock(&bd->ops_lock); bd->ops = NULL; mutex_unlock(&bd->ops_lock); backlight_unregister_fb(bd); device_unregister(&bd->dev); } EXPORT_SYMBOL(backlight_device_unregister); static void devm_backlight_device_release(struct device *dev, void *res) { struct backlight_device *backlight = *(struct backlight_device **)res; backlight_device_unregister(backlight); } static int devm_backlight_device_match(struct device *dev, void *res, void *data) { struct backlight_device **r = res; return *r == data; } /** * devm_backlight_device_register - register a new backlight device * @dev: the device to register * @name: the name of the device * @parent: a pointer to the parent device (often the same as @dev) * @devdata: an optional pointer to be stored for private driver use * @ops: the backlight operations structure * @props: the backlight properties * * Creates and registers new backlight device. When a backlight device * is registered the configuration must be specified in the @props * parameter. See description of &backlight_properties. * * RETURNS: * * struct backlight on success, or an ERR_PTR on error */ struct backlight_device *devm_backlight_device_register(struct device *dev, const char *name, struct device *parent, void *devdata, const struct backlight_ops *ops, const struct backlight_properties *props) { struct backlight_device **ptr, *backlight; ptr = devres_alloc(devm_backlight_device_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); backlight = backlight_device_register(name, parent, devdata, ops, props); if (!IS_ERR(backlight)) { *ptr = backlight; devres_add(dev, ptr); } else { devres_free(ptr); } return backlight; } EXPORT_SYMBOL(devm_backlight_device_register); /** * devm_backlight_device_unregister - unregister backlight device * @dev: the device to unregister * @bd: the backlight device to unregister * * Deallocates a backlight allocated with devm_backlight_device_register(). * Normally this function will not need to be called and the resource management * code will ensure that the resources are freed. */ void devm_backlight_device_unregister(struct device *dev, struct backlight_device *bd) { int rc; rc = devres_release(dev, devm_backlight_device_release, devm_backlight_device_match, bd); WARN_ON(rc); } EXPORT_SYMBOL(devm_backlight_device_unregister); #ifdef CONFIG_OF static int of_parent_match(struct device *dev, const void *data) { return dev->parent && dev->parent->of_node == data; } /** * of_find_backlight_by_node() - find backlight device by device-tree node * @node: device-tree node of the backlight device * * Returns a pointer to the backlight device corresponding to the given DT * node or NULL if no such backlight device exists or if the device hasn't * been probed yet. * * This function obtains a reference on the backlight device and it is the * caller's responsibility to drop the reference by calling put_device() on * the backlight device's .dev field. */ struct backlight_device *of_find_backlight_by_node(struct device_node *node) { struct device *dev; dev = class_find_device(&backlight_class, NULL, node, of_parent_match); return dev ? to_backlight_device(dev) : NULL; } EXPORT_SYMBOL(of_find_backlight_by_node); #endif static struct backlight_device *of_find_backlight(struct device *dev) { struct backlight_device *bd = NULL; struct device_node *np; if (!dev) return NULL; if (IS_ENABLED(CONFIG_OF) && dev->of_node) { np = of_parse_phandle(dev->of_node, "backlight", 0); if (np) { bd = of_find_backlight_by_node(np); of_node_put(np); if (!bd) return ERR_PTR(-EPROBE_DEFER); } } return bd; } static void devm_backlight_release(void *data) { struct backlight_device *bd = data; put_device(&bd->dev); } /** * devm_of_find_backlight - find backlight for a device * @dev: the device * * This function looks for a property named 'backlight' on the DT node * connected to @dev and looks up the backlight device. The lookup is * device managed so the reference to the backlight device is automatically * dropped on driver detach. * * RETURNS: * * A pointer to the backlight device if found. * Error pointer -EPROBE_DEFER if the DT property is set, but no backlight * device is found. NULL if there's no backlight property. */ struct backlight_device *devm_of_find_backlight(struct device *dev) { struct backlight_device *bd; int ret; bd = of_find_backlight(dev); if (IS_ERR_OR_NULL(bd)) return bd; ret = devm_add_action_or_reset(dev, devm_backlight_release, bd); if (ret) return ERR_PTR(ret); return bd; } EXPORT_SYMBOL(devm_of_find_backlight); static void __exit backlight_class_exit(void) { class_unregister(&backlight_class); } static int __init backlight_class_init(void) { int ret; ret = class_register(&backlight_class); if (ret) { pr_warn("Unable to create backlight class; errno = %d\n", ret); return ret; } INIT_LIST_HEAD(&backlight_dev_list); mutex_init(&backlight_dev_list_mutex); return 0; } /* * if this is compiled into the kernel, we need to ensure that the * class is registered before users of the class try to register lcd's */ postcore_initcall(backlight_class_init); module_exit(backlight_class_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jamey Hicks <jamey.hicks@hp.com>, Andrew Zabolotny <zap@homelink.ru>"); MODULE_DESCRIPTION("Backlight Lowlevel Control Abstraction");
2927 20 28 2917 2928 2917 28 5148 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 // SPDX-License-Identifier: GPL-2.0 /* * USB-ACPI glue code * * Copyright 2012 Red Hat <mjg@redhat.com> */ #include <linux/module.h> #include <linux/usb.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/pci.h> #include <linux/usb/hcd.h> #include "hub.h" /** * usb_acpi_power_manageable - check whether usb port has * acpi power resource. * @hdev: USB device belonging to the usb hub * @index: port index based zero * * Return true if the port has acpi power resource and false if no. */ bool usb_acpi_power_manageable(struct usb_device *hdev, int index) { acpi_handle port_handle; int port1 = index + 1; port_handle = usb_get_hub_port_acpi_handle(hdev, port1); if (port_handle) return acpi_bus_power_manageable(port_handle); else return false; } EXPORT_SYMBOL_GPL(usb_acpi_power_manageable); #define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899" #define USB_DSM_DISABLE_U1_U2_FOR_PORT 5 /** * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port. * @hdev: USB device belonging to the usb hub * @index: zero based port index * * Some USB3 ports may not support USB3 link power management U1/U2 states * due to different retimer setup. ACPI provides _DSM method which returns 0x01 * if U1 and U2 states should be disabled. Evaluate _DSM with: * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899 * Arg1: Revision ID = 0 * Arg2: Function Index = 5 * Arg3: (empty) * * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0 */ int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index) { union acpi_object *obj; acpi_handle port_handle; int port1 = index + 1; guid_t guid; int ret; ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid); if (ret) return ret; port_handle = usb_get_hub_port_acpi_handle(hdev, port1); if (!port_handle) { dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1); return -ENODEV; } if (!acpi_check_dsm(port_handle, &guid, 0, BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) { dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n", port1, USB_DSM_DISABLE_U1_U2_FOR_PORT); return -ENODEV; } obj = acpi_evaluate_dsm_typed(port_handle, &guid, 0, USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL, ACPI_TYPE_INTEGER); if (!obj) { dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1); return -EINVAL; } if (obj->integer.value == 0x01) ret = 1; ACPI_FREE(obj); return ret; } EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable); /** * usb_acpi_set_power_state - control usb port's power via acpi power * resource * @hdev: USB device belonging to the usb hub * @index: port index based zero * @enable: power state expected to be set * * Notice to use usb_acpi_power_manageable() to check whether the usb port * has acpi power resource before invoking this function. * * Returns 0 on success, else negative errno. */ int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_port *port_dev; acpi_handle port_handle; unsigned char state; int port1 = index + 1; int error = -EINVAL; if (!hub) return -ENODEV; port_dev = hub->ports[port1 - 1]; port_handle = (acpi_handle) usb_get_hub_port_acpi_handle(hdev, port1); if (!port_handle) return error; if (enable) state = ACPI_STATE_D0; else state = ACPI_STATE_D3_COLD; error = acpi_bus_set_power(port_handle, state); if (!error) dev_dbg(&port_dev->dev, "acpi: power was set to %d\n", enable); else dev_dbg(&port_dev->dev, "acpi: power failed to be set\n"); return error; } EXPORT_SYMBOL_GPL(usb_acpi_set_power_state); /** * usb_acpi_add_usb4_devlink - add device link to USB4 Host Interface for tunneled USB3 devices * * @udev: Tunneled USB3 device connected to a roothub. * * Adds a device link between a tunneled USB3 device and the USB4 Host Interface * device to ensure correct runtime PM suspend and resume order. This function * should only be called for tunneled USB3 devices. * The USB4 Host Interface this tunneled device depends on is found from the roothub * port ACPI device specific data _DSD entry. * * Return: negative error code on failure, 0 otherwise */ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) { const struct device_link *link; struct usb_port *port_dev; struct usb_hub *hub; if (!udev->parent || udev->parent->parent) return 0; hub = usb_hub_to_struct_hub(udev->parent); port_dev = hub->ports[udev->portnum - 1]; struct fwnode_handle *nhi_fwnode __free(fwnode_handle) = fwnode_find_reference(dev_fwnode(&port_dev->dev), "usb4-host-interface", 0); if (IS_ERR(nhi_fwnode) || !nhi_fwnode->dev) return 0; link = device_link_add(&port_dev->child->dev, nhi_fwnode->dev, DL_FLAG_STATELESS | DL_FLAG_RPM_ACTIVE | DL_FLAG_PM_RUNTIME); if (!link) { dev_err(&port_dev->dev, "Failed to created device link from %s to %s\n", dev_name(&port_dev->child->dev), dev_name(nhi_fwnode->dev)); return -EINVAL; } dev_dbg(&port_dev->dev, "Created device link from %s to %s\n", dev_name(&port_dev->child->dev), dev_name(nhi_fwnode->dev)); return 0; } /* * Private to usb-acpi, all the core needs to know is that * port_dev->location is non-zero when it has been set by the firmware. */ #define USB_ACPI_LOCATION_VALID (1 << 31) static void usb_acpi_get_connect_type(struct usb_port *port_dev, acpi_handle *handle) { enum usb_port_connect_type connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *upc = NULL; struct acpi_pld_info *pld = NULL; acpi_status status; /* * According to 9.14 in ACPI Spec 6.2. _PLD indicates whether usb port * is user visible and _UPC indicates whether it is connectable. If * the port was visible and connectable, it could be freely connected * and disconnected with USB devices. If no visible and connectable, * a usb device is directly hard-wired to the port. If no visible and * no connectable, the port would be not used. */ if (acpi_get_physical_device_location(handle, &pld) && pld) port_dev->location = USB_ACPI_LOCATION_VALID | pld->group_token << 8 | pld->group_position; status = acpi_evaluate_object(handle, "_UPC", NULL, &buffer); if (ACPI_FAILURE(status)) goto out; upc = buffer.pointer; if (!upc || (upc->type != ACPI_TYPE_PACKAGE) || upc->package.count != 4) goto out; /* UPC states port is connectable */ if (upc->package.elements[0].integer.value) if (!pld) ; /* keep connect_type as unknown */ else if (pld->user_visible) connect_type = USB_PORT_CONNECT_TYPE_HOT_PLUG; else connect_type = USB_PORT_CONNECT_TYPE_HARD_WIRED; else connect_type = USB_PORT_NOT_USED; out: port_dev->connect_type = connect_type; kfree(upc); ACPI_FREE(pld); } static struct acpi_device * usb_acpi_get_companion_for_port(struct usb_port *port_dev) { struct usb_device *udev; struct acpi_device *adev; acpi_handle *parent_handle; int port1; /* Get the struct usb_device point of port's hub */ udev = to_usb_device(port_dev->dev.parent->parent); /* * The root hub ports' parent is the root hub. The non-root-hub * ports' parent is the parent hub port which the hub is * connected to. */ if (!udev->parent) { adev = ACPI_COMPANION(&udev->dev); port1 = usb_hcd_find_raw_port_number(bus_to_hcd(udev->bus), port_dev->portnum); } else { parent_handle = usb_get_hub_port_acpi_handle(udev->parent, udev->portnum); if (!parent_handle) return NULL; adev = acpi_fetch_acpi_dev(parent_handle); port1 = port_dev->portnum; } return acpi_find_child_by_adr(adev, port1); } static struct acpi_device * usb_acpi_find_companion_for_port(struct usb_port *port_dev) { struct acpi_device *adev; adev = usb_acpi_get_companion_for_port(port_dev); if (!adev) return NULL; usb_acpi_get_connect_type(port_dev, adev->handle); return adev; } static struct acpi_device * usb_acpi_find_companion_for_device(struct usb_device *udev) { struct acpi_device *adev; struct usb_port *port_dev; struct usb_hub *hub; if (!udev->parent) { /* * root hub is only child (_ADR=0) under its parent, the HC. * sysdev pointer is the HC as seen from firmware. */ adev = ACPI_COMPANION(udev->bus->sysdev); return acpi_find_child_device(adev, 0, false); } hub = usb_hub_to_struct_hub(udev->parent); if (!hub) return NULL; /* Tunneled USB3 devices depend on USB4 Host Interface, set device link to it */ if (udev->speed >= USB_SPEED_SUPER && udev->tunnel_mode != USB_LINK_NATIVE) usb_acpi_add_usb4_devlink(udev); /* * This is an embedded USB device connected to a port and such * devices share port's ACPI companion. */ port_dev = hub->ports[udev->portnum - 1]; return usb_acpi_get_companion_for_port(port_dev); } static struct acpi_device *usb_acpi_find_companion(struct device *dev) { /* * The USB hierarchy like following: * * Device (EHC1) * Device (HUBN) * Device (PR01) * Device (PR11) * Device (PR12) * Device (FN12) * Device (FN13) * Device (PR13) * ... * where HUBN is root hub, and PRNN are USB ports and devices * connected to them, and FNNN are individualk functions for * connected composite USB devices. PRNN and FNNN may contain * _CRS and other methods describing sideband resources for * the connected device. * * On the kernel side both root hub and embedded USB devices are * represented as instances of usb_device structure, and ports * are represented as usb_port structures, so the whole process * is split into 2 parts: finding companions for devices and * finding companions for ports. * * Note that we do not handle individual functions of composite * devices yet, for that we would need to assign companions to * devices corresponding to USB interfaces. */ if (is_usb_device(dev)) return usb_acpi_find_companion_for_device(to_usb_device(dev)); else if (is_usb_port(dev)) return usb_acpi_find_companion_for_port(to_usb_port(dev)); return NULL; } static bool usb_acpi_bus_match(struct device *dev) { return is_usb_device(dev) || is_usb_port(dev); } static struct acpi_bus_type usb_acpi_bus = { .name = "USB", .match = usb_acpi_bus_match, .find_companion = usb_acpi_find_companion, }; int usb_acpi_register(void) { return register_acpi_bus_type(&usb_acpi_bus); } void usb_acpi_unregister(void) { unregister_acpi_bus_type(&usb_acpi_bus); }
4 4 4 4 1695 72 712 713 714 85 6 4 590 593 592 2 62 1084 1082 60 4 1065 60 1084 1084 60 1063 60 3 3 8 8 8 8 1809 1788 87 824 824 27 887 879 1091 1080 62 5 1111 1099 61 5 4 2 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/types.h> #include <linux/export.h> #include <linux/init.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/icmp.h> #include <linux/icmpv6.h> #include <linux/dccp.h> #include <linux/sctp.h> #include <net/sctp/checksum.h> #include <linux/netfilter.h> #include <net/netfilter/nf_nat.h> #include <linux/ipv6.h> #include <linux/netfilter_ipv6.h> #include <net/checksum.h> #include <net/ip6_checksum.h> #include <net/ip6_route.h> #include <net/xfrm.h> #include <net/ipv6.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack.h> #include <linux/netfilter/nfnetlink_conntrack.h> static void nf_csum_update(struct sk_buff *skb, unsigned int iphdroff, __sum16 *check, const struct nf_conntrack_tuple *t, enum nf_nat_manip_type maniptype); static void __udp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, struct udphdr *hdr, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, bool do_csum) { __be16 *portptr, newport; if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src port */ newport = tuple->src.u.udp.port; portptr = &hdr->source; } else { /* Get rid of dst port */ newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } if (do_csum) { nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; } *portptr = newport; } static bool udp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct udphdr *hdr; if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) return false; hdr = (struct udphdr *)(skb->data + hdroff); __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check); return true; } static bool udplite_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { #ifdef CONFIG_NF_CT_PROTO_UDPLITE struct udphdr *hdr; if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) return false; hdr = (struct udphdr *)(skb->data + hdroff); __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true); #endif return true; } static bool sctp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { #ifdef CONFIG_NF_CT_PROTO_SCTP struct sctphdr *hdr; int hdrsize = 8; /* This could be an inner header returned in imcp packet; in such * cases we cannot update the checksum field since it is outside * of the 8 bytes of transport layer headers we are guaranteed. */ if (skb->len >= hdroff + sizeof(*hdr)) hdrsize = sizeof(*hdr); if (skb_ensure_writable(skb, hdroff + hdrsize)) return false; hdr = (struct sctphdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src port */ hdr->source = tuple->src.u.sctp.port; } else { /* Get rid of dst port */ hdr->dest = tuple->dst.u.sctp.port; } if (hdrsize < sizeof(*hdr)) return true; if (skb->ip_summed != CHECKSUM_PARTIAL) { hdr->checksum = sctp_compute_cksum(skb, hdroff); skb->ip_summed = CHECKSUM_NONE; } #endif return true; } static bool tcp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct tcphdr *hdr; __be16 *portptr, newport, oldport; int hdrsize = 8; /* TCP connection tracking guarantees this much */ /* this could be a inner header returned in icmp packet; in such cases we cannot update the checksum field since it is outside of the 8 bytes of transport layer headers we are guaranteed */ if (skb->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); if (skb_ensure_writable(skb, hdroff + hdrsize)) return false; hdr = (struct tcphdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src port */ newport = tuple->src.u.tcp.port; portptr = &hdr->source; } else { /* Get rid of dst port */ newport = tuple->dst.u.tcp.port; portptr = &hdr->dest; } oldport = *portptr; *portptr = newport; if (hdrsize < sizeof(*hdr)) return true; nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); return true; } static bool dccp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { #ifdef CONFIG_NF_CT_PROTO_DCCP struct dccp_hdr *hdr; __be16 *portptr, oldport, newport; int hdrsize = 8; /* DCCP connection tracking guarantees this much */ if (skb->len >= hdroff + sizeof(struct dccp_hdr)) hdrsize = sizeof(struct dccp_hdr); if (skb_ensure_writable(skb, hdroff + hdrsize)) return false; hdr = (struct dccp_hdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { newport = tuple->src.u.dccp.port; portptr = &hdr->dccph_sport; } else { newport = tuple->dst.u.dccp.port; portptr = &hdr->dccph_dport; } oldport = *portptr; *portptr = newport; if (hdrsize < sizeof(*hdr)) return true; nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, false); #endif return true; } static bool icmp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct icmphdr *hdr; if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) return false; hdr = (struct icmphdr *)(skb->data + hdroff); switch (hdr->type) { case ICMP_ECHO: case ICMP_ECHOREPLY: case ICMP_TIMESTAMP: case ICMP_TIMESTAMPREPLY: case ICMP_INFO_REQUEST: case ICMP_INFO_REPLY: case ICMP_ADDRESS: case ICMP_ADDRESSREPLY: break; default: return true; } inet_proto_csum_replace2(&hdr->checksum, skb, hdr->un.echo.id, tuple->src.u.icmp.id, false); hdr->un.echo.id = tuple->src.u.icmp.id; return true; } static bool icmpv6_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct icmp6hdr *hdr; if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) return false; hdr = (struct icmp6hdr *)(skb->data + hdroff); nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype); if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST || hdr->icmp6_type == ICMPV6_ECHO_REPLY) { inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, hdr->icmp6_identifier, tuple->src.u.icmp.id, false); hdr->icmp6_identifier = tuple->src.u.icmp.id; } return true; } /* manipulate a GRE packet according to maniptype */ static bool gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) const struct gre_base_hdr *greh; struct pptp_gre_header *pgreh; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8)) return false; greh = (void *)skb->data + hdroff; pgreh = (struct pptp_gre_header *)greh; /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype != NF_NAT_MANIP_DST) return true; switch (greh->flags & GRE_VERSION) { case GRE_VERSION_0: /* We do not currently NAT any GREv0 packets. * Try to behave like "nf_nat_proto_unknown" */ break; case GRE_VERSION_1: pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; default: pr_debug("can't nat unknown GRE version\n"); return false; } #endif return true; } static bool l4proto_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { switch (tuple->dst.protonum) { case IPPROTO_TCP: return tcp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_UDP: return udp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_UDPLITE: return udplite_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_SCTP: return sctp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_ICMP: return icmp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_ICMPV6: return icmpv6_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_DCCP: return dccp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_GRE: return gre_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); } /* If we don't know protocol -- no error, pass it unmodified. */ return true; } static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) { struct iphdr *iph; unsigned int hdroff; if (skb_ensure_writable(skb, iphdroff + sizeof(*iph))) return false; iph = (void *)skb->data + iphdroff; hdroff = iphdroff + iph->ihl * 4; if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype)) return false; iph = (void *)skb->data + iphdroff; if (maniptype == NF_NAT_MANIP_SRC) { csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); iph->saddr = target->src.u3.ip; } else { csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); iph->daddr = target->dst.u3.ip; } return true; } static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) { #if IS_ENABLED(CONFIG_IPV6) struct ipv6hdr *ipv6h; __be16 frag_off; int hdroff; u8 nexthdr; if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h))) return false; ipv6h = (void *)skb->data + iphdroff; nexthdr = ipv6h->nexthdr; hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), &nexthdr, &frag_off); if (hdroff < 0) goto manip_addr; if ((frag_off & htons(~0x7)) == 0 && !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype)) return false; /* must reload, offset might have changed */ ipv6h = (void *)skb->data + iphdroff; manip_addr: if (maniptype == NF_NAT_MANIP_SRC) ipv6h->saddr = target->src.u3.in6; else ipv6h->daddr = target->dst.u3.in6; #endif return true; } unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, enum nf_nat_manip_type mtype, enum ip_conntrack_dir dir) { struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); switch (target.src.l3num) { case NFPROTO_IPV6: if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype)) return NF_ACCEPT; break; case NFPROTO_IPV4: if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype)) return NF_ACCEPT; break; default: WARN_ON_ONCE(1); break; } return NF_DROP; } static void nf_nat_ipv4_csum_update(struct sk_buff *skb, unsigned int iphdroff, __sum16 *check, const struct nf_conntrack_tuple *t, enum nf_nat_manip_type maniptype) { struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); __be32 oldip, newip; if (maniptype == NF_NAT_MANIP_SRC) { oldip = iph->saddr; newip = t->src.u3.ip; } else { oldip = iph->daddr; newip = t->dst.u3.ip; } inet_proto_csum_replace4(check, skb, oldip, newip, true); } static void nf_nat_ipv6_csum_update(struct sk_buff *skb, unsigned int iphdroff, __sum16 *check, const struct nf_conntrack_tuple *t, enum nf_nat_manip_type maniptype) { #if IS_ENABLED(CONFIG_IPV6) const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); const struct in6_addr *oldip, *newip; if (maniptype == NF_NAT_MANIP_SRC) { oldip = &ipv6h->saddr; newip = &t->src.u3.in6; } else { oldip = &ipv6h->daddr; newip = &t->dst.u3.in6; } inet_proto_csum_replace16(check, skb, oldip->s6_addr32, newip->s6_addr32, true); #endif } static void nf_csum_update(struct sk_buff *skb, unsigned int iphdroff, __sum16 *check, const struct nf_conntrack_tuple *t, enum nf_nat_manip_type maniptype) { switch (t->src.l3num) { case NFPROTO_IPV4: nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype); return; case NFPROTO_IPV6: nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype); return; } } static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, u8 proto, void *data, __sum16 *check, int datalen, int oldlen) { if (skb->ip_summed != CHECKSUM_PARTIAL) { const struct iphdr *iph = ip_hdr(skb); skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + ip_hdrlen(skb); skb->csum_offset = (void *)check - data; *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, proto, 0); } else { inet_proto_csum_replace2(check, skb, htons(oldlen), htons(datalen), true); } } #if IS_ENABLED(CONFIG_IPV6) static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, u8 proto, void *data, __sum16 *check, int datalen, int oldlen) { if (skb->ip_summed != CHECKSUM_PARTIAL) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + (data - (void *)skb->data); skb->csum_offset = (void *)check - data; *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, datalen, proto, 0); } else { inet_proto_csum_replace2(check, skb, htons(oldlen), htons(datalen), true); } } #endif void nf_nat_csum_recalc(struct sk_buff *skb, u8 nfproto, u8 proto, void *data, __sum16 *check, int datalen, int oldlen) { switch (nfproto) { case NFPROTO_IPV4: nf_nat_ipv4_csum_recalc(skb, proto, data, check, datalen, oldlen); return; #if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: nf_nat_ipv6_csum_recalc(skb, proto, data, check, datalen, oldlen); return; #endif } WARN_ON_ONCE(1); } int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum) { struct { struct icmphdr icmp; struct iphdr ip; } *inside; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); unsigned int hdrlen = ip_hdrlen(skb); struct nf_conntrack_tuple target; unsigned long statusbit; WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); if (skb_ensure_writable(skb, hdrlen + sizeof(*inside))) return 0; if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP)) return 0; inside = (void *)skb->data + hdrlen; if (inside->icmp.type == ICMP_REDIRECT) { if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) return 0; if (ct->status & IPS_NAT_MASK) return 0; } if (manip == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply direction */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; if (!(ct->status & statusbit)) return 1; if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt may reallocate */ inside = (void *)skb->data + hdrlen; inside->icmp.checksum = 0; inside->icmp.checksum = csum_fold(skb_checksum(skb, hdrlen, skb->len - hdrlen, 0)); } /* Change outer to look like the reply to an incoming packet */ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); target.dst.protonum = IPPROTO_ICMP; if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; return 1; } EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); static unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; ct = nf_ct_get(skb, &ctinfo); if (!ct) return NF_ACCEPT; if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, state->hook)) return NF_DROP; else return NF_ACCEPT; } } return nf_nat_inet_fn(priv, skb, state); } static unsigned int nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; ret = nf_nat_ipv4_fn(priv, skb, state); if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); return ret; } #ifdef CONFIG_XFRM static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) { struct sock *sk = skb->sk; struct dst_entry *dst; unsigned int hh_len; struct flowi fl; int err; err = xfrm_decode_session(net, skb, &fl, family); if (err < 0) return err; dst = skb_dst(skb); if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; if (!dst_hold_safe(dst)) return -EHOSTUNREACH; if (sk && !net_eq(net, sock_net(sk))) sk = NULL; dst = xfrm_lookup(net, dst, &fl, sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); skb_dst_drop(skb); skb_dst_set(skb, dst); /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) return -ENOMEM; return 0; } #endif static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport) { enum ip_conntrack_info ctinfo; enum ip_conntrack_dir dir; const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; switch (nf_ct_protonum(ct)) { case IPPROTO_TCP: case IPPROTO_UDP: break; default: return false; } dir = CTINFO2DIR(ctinfo); if (dir != IP_CT_DIR_ORIGINAL) return false; return ct->tuplehash[!dir].tuple.dst.u.all != sport; } static unsigned int nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { __be32 saddr = ip_hdr(skb)->saddr; struct sock *sk = skb->sk; unsigned int ret; ret = nf_nat_ipv4_fn(priv, skb, state); if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) return ret; /* skb has a socket assigned via tcp edemux. We need to check * if nf_nat_ipv4_fn() has mangled the packet in a way that * edemux would not have found this socket. * * This includes both changes to the source address and changes * to the source port, which are both handled by the * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux * might have found a socket for the old (pre-snat) address. */ if (saddr != ip_hdr(skb)->saddr || nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) skb_orphan(skb); /* TCP edemux obtained wrong socket */ return ret; } static unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { #ifdef CONFIG_XFRM const struct nf_conn *ct; enum ip_conntrack_info ctinfo; int err; #endif unsigned int ret; ret = nf_nat_ipv4_fn(priv, skb, state); #ifdef CONFIG_XFRM if (ret != NF_ACCEPT) return ret; if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return ret; ct = nf_ct_get(skb, &ctinfo); if (ct) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.u3.ip != ct->tuplehash[!dir].tuple.dst.u3.ip || (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all)) { err = nf_xfrm_me_harder(state->net, skb, AF_INET); if (err < 0) ret = NF_DROP_ERR(err); } } #endif return ret; } static unsigned int nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int ret; int err; ret = nf_nat_ipv4_fn(priv, skb, state); if (ret != NF_ACCEPT) return ret; ct = nf_ct_get(skb, &ctinfo); if (ct) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } #ifdef CONFIG_XFRM else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) { err = nf_xfrm_me_harder(state->net, skb, AF_INET); if (err < 0) ret = NF_DROP_ERR(err); } #endif } return ret; } static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_pre_routing, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, }, /* After packet filtering, change source */ { .hook = nf_nat_ipv4_out, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_local_fn, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST, }, /* After packet filtering, change source */ { .hook = nf_nat_ipv4_local_in, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, }, }; int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops) { return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn); void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn); #if IS_ENABLED(CONFIG_IPV6) int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, unsigned int hdrlen) { struct { struct icmp6hdr icmp6; struct ipv6hdr ip6; } *inside; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); struct nf_conntrack_tuple target; unsigned long statusbit; WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); if (skb_ensure_writable(skb, hdrlen + sizeof(*inside))) return 0; if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) return 0; inside = (void *)skb->data + hdrlen; if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) return 0; if (ct->status & IPS_NAT_MASK) return 0; } if (manip == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply direction */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; if (!(ct->status & statusbit)) return 1; if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); inside = (void *)skb->data + hdrlen; inside->icmp6.icmp6_cksum = 0; inside->icmp6.icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len - hdrlen, IPPROTO_ICMPV6, skb_checksum(skb, hdrlen, skb->len - hdrlen, 0)); } nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); target.dst.protonum = IPPROTO_ICMPV6; if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; return 1; } EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); static unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be16 frag_off; int hdrlen; u8 nexthdr; ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to * packet filter it out, or implement conntrack/NAT for that * protocol. 8) --RR */ if (!ct) return NF_ACCEPT; if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { nexthdr = ipv6_hdr(skb)->nexthdr; hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, state->hook, hdrlen)) return NF_DROP; else return NF_ACCEPT; } } return nf_nat_inet_fn(priv, skb, state); } static unsigned int nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct in6_addr saddr = ipv6_hdr(skb)->saddr; struct sock *sk = skb->sk; unsigned int ret; ret = nf_nat_ipv6_fn(priv, skb, state); if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) return ret; /* see nf_nat_ipv4_local_in */ if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) || nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) skb_orphan(skb); return ret; } static unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret, verdict; struct in6_addr daddr = ipv6_hdr(skb)->daddr; ret = nf_nat_ipv6_fn(priv, skb, state); verdict = ret & NF_VERDICT_MASK; if (verdict != NF_DROP && verdict != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); return ret; } static unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { #ifdef CONFIG_XFRM const struct nf_conn *ct; enum ip_conntrack_info ctinfo; int err; #endif unsigned int ret; ret = nf_nat_ipv6_fn(priv, skb, state); #ifdef CONFIG_XFRM if (ret != NF_ACCEPT) return ret; if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return ret; ct = nf_ct_get(skb, &ctinfo); if (ct) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3) || (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all)) { err = nf_xfrm_me_harder(state->net, skb, AF_INET6); if (err < 0) ret = NF_DROP_ERR(err); } } #endif return ret; } static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int ret; int err; ret = nf_nat_ipv6_fn(priv, skb, state); if (ret != NF_ACCEPT) return ret; ct = nf_ct_get(skb, &ctinfo); if (ct) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { err = nf_ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } #ifdef CONFIG_XFRM else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) { err = nf_xfrm_me_harder(state->net, skb, AF_INET6); if (err < 0) ret = NF_DROP_ERR(err); } #endif } return ret; } static const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, /* After packet filtering, change source */ { .hook = nf_nat_ipv6_out, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_local_fn, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, /* After packet filtering, change source */ { .hook = nf_nat_ipv6_local_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, }, }; int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) { return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn); #endif /* CONFIG_IPV6 */ #if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT) int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops) { int ret; if (WARN_ON_ONCE(ops->pf != NFPROTO_INET)) return -EINVAL; ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); if (ret) return ret; ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); if (ret) nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); return ret; } EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn); void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn); #endif /* NFT INET NAT */
18 19 15 3 37 34 1 1 35 35 34 35 35 16 18 6 26 25 1 1 1 23 23 23 23 3 282 31 268 273 2 4 4 4 4 2 2 2 2 2 2 26 4 26 16 8 2 9 29 8 7 3 4 8 36 36 36 60 2 61 61 61 390 253 25 26 25 144 90 2 64 26 549 569 569 505 156 505 505 505 506 506 306 146 183 261 256 4 266 267 253 3 225 226 225 226 226 2 2 2 1 5 3 8 25 15 2 9 26 26 16 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 International Business Machines, Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * These functions handle all input from the IP layer into SCTP. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Xingang Guo <xingang.guo@intel.com> * Jon Grimm <jgrimm@us.ibm.com> * Hui Huang <hui.huang@nokia.com> * Daisy Chang <daisyc@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> */ #include <linux/types.h> #include <linux/list.h> /* For struct list_head */ #include <linux/socket.h> #include <linux/ip.h> #include <linux/time.h> /* For struct timeval */ #include <linux/slab.h> #include <net/ip.h> #include <net/icmp.h> #include <net/snmp.h> #include <net/sock.h> #include <net/xfrm.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/checksum.h> #include <net/net_namespace.h> #include <linux/rhashtable.h> #include <net/sock_reuseport.h> /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif); static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, const union sctp_addr *daddr, int dif, int sdif); static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt, int dif, int sdif); static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) { struct sctphdr *sh = sctp_hdr(skb); __le32 cmp = sh->checksum; __le32 val = sctp_compute_cksum(skb, 0); if (val != cmp) { /* CRC failure, dump it. */ __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; } /* * This is the routine which IP calls when receiving an SCTP packet. */ int sctp_rcv(struct sk_buff *skb) { struct sock *sk; struct sctp_association *asoc; struct sctp_endpoint *ep = NULL; struct sctp_ep_common *rcvr; struct sctp_transport *transport = NULL; struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb); int dif, sdif; if (skb->pkt_type != PACKET_HOST) goto discard_it; __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS); /* If packet is too small to contain a single chunk, let's not * waste time on it anymore. */ if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + skb_transport_offset(skb)) goto discard_it; /* If the packet is fragmented and we need to do crc checking, * it's better to just linearize it otherwise crc computing * takes longer. */ if ((!is_gso && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; /* Pull up the IP header. */ __skb_pull(skb, skb_transport_offset(skb)); skb->csum_valid = 0; /* Previous value not applicable */ if (skb_csum_unnecessary(skb)) __skb_decr_checksum_unnecessary(skb); else if (!sctp_checksum_disable && !is_gso && sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb->csum_valid = 1; __skb_pull(skb, sizeof(struct sctphdr)); family = ipver2af(ip_hdr(skb)->version); af = sctp_get_af_specific(family); if (unlikely(!af)) goto discard_it; SCTP_INPUT_CB(skb)->af = af; /* Initialize local addresses for lookups. */ af->from_skb(&src, skb, 1); af->from_skb(&dest, skb, 0); dif = af->skb_iif(skb); sdif = af->skb_sdif(skb); /* If the packet is to or from a non-unicast address, * silently discard the packet. * * This is not clearly defined in the RFC except in section * 8.4 - OOTB handling. However, based on the book "Stream Control * Transmission Protocol" 2.1, "It is important to note that the * IP address of an SCTP transport address must be a routable * unicast address. In other words, IP multicast addresses and * IP broadcast addresses cannot be used in an SCTP transport * address." */ if (!af->addr_valid(&src, NULL, skb) || !af->addr_valid(&dest, NULL, skb)) goto discard_it; asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport, dif, sdif); if (!asoc) ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src, dif, sdif); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; sk = rcvr->sk; /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * An SCTP packet is called an "out of the blue" (OOTB) * packet if it is correctly formed, i.e., passed the * receiver's checksum check, but the receiver is not * able to identify the association to which this * packet belongs. */ if (!asoc) { if (sctp_rcv_ootb(skb)) { __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; nf_reset_ct(skb); if (sk_filter(sk, skb)) goto discard_release; /* Create an SCTP packet structure. */ chunk = sctp_chunkify(skb, asoc, sk, GFP_ATOMIC); if (!chunk) goto discard_release; SCTP_INPUT_CB(skb)->chunk = chunk; /* Remember what endpoint is to handle this packet. */ chunk->rcvr = rcvr; /* Remember the SCTP header. */ chunk->sctp_hdr = sctp_hdr(skb); /* Set the source and destination addresses of the incoming chunk. */ sctp_init_addrs(chunk, &src, &dest); /* Remember where we came from. */ chunk->transport = transport; /* Acquire access to the sock lock. Note: We are safe from other * bottom halves on this lock, but a user may be in the lock too, * so check if it is busy. */ bh_lock_sock(sk); if (sk != rcvr->sk) { /* Our cached sk is different from the rcvr->sk. This is * because migrate()/accept() may have moved the association * to a new socket and released all the sockets. So now we * are holding a lock on the old socket while the user may * be doing something with the new socket. Switch our veiw * of the current sk. */ bh_unlock_sock(sk); sk = rcvr->sk; bh_lock_sock(sk); } if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { if (sctp_add_backlog(sk, skb)) { bh_unlock_sock(sk); sctp_chunk_free(chunk); skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG); } else { __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ if (transport) sctp_transport_put(transport); else sctp_endpoint_put(ep); return 0; discard_it: __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; discard_release: /* Release the asoc/ep ref we took in the lookup calls. */ if (transport) sctp_transport_put(transport); else sctp_endpoint_put(ep); goto discard_it; } /* Process the backlog queue of the socket. Every skb on * the backlog holds a ref on an association or endpoint. * We hold this ref throughout the state machine to make * sure that the structure we need is still around. */ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_inq *inqueue = &chunk->rcvr->inqueue; struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = NULL; int backloged = 0; rcvr = chunk->rcvr; /* If the rcvr is dead then the association or endpoint * has been deleted and we can safely drop the chunk * and refs that we are holding. */ if (rcvr->dead) { sctp_chunk_free(chunk); goto done; } if (unlikely(rcvr->sk != sk)) { /* In this case, the association moved from one socket to * another. We are currently sitting on the backlog of the * old socket, so we need to move. * However, since we are here in the process context we * need to take make sure that the user doesn't own * the new socket when we process the packet. * If the new socket is user-owned, queue the chunk to the * backlog of the new socket without dropping any refs. * Otherwise, we can safely push the chunk on the inqueue. */ sk = rcvr->sk; local_bh_disable(); bh_lock_sock(sk); if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) sctp_chunk_free(chunk); else backloged = 1; } else sctp_inq_push(inqueue, chunk); bh_unlock_sock(sk); local_bh_enable(); /* If the chunk was backloged again, don't drop refs */ if (backloged) return 0; } else { if (!sctp_newsk_ready(sk)) { if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) return 0; sctp_chunk_free(chunk); } else { sctp_inq_push(inqueue, chunk); } } done: /* Release the refs we took in sctp_add_backlog */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) sctp_transport_put(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_put(sctp_ep(rcvr)); else BUG(); return 0; } static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = chunk->rcvr; int ret; ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear * from us */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) sctp_transport_hold(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_hold(sctp_ep(rcvr)); else BUG(); } return ret; } /* Handle icmp frag needed error. */ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { if (!t || (t->pathmtu <= pmtu && t->pl.probe_size + sctp_transport_pl_hlen(t) <= pmtu)) return; if (sock_owned_by_user(sk)) { atomic_set(&t->mtu_info, pmtu); asoc->pmtu_pending = 1; t->pmtu_pending = 1; return; } if (!(t->param_flags & SPP_PMTUD_ENABLE)) /* We can't allow retransmitting in such case, as the * retransmission would be sized just as before, and thus we * would get another icmp, and retransmit again. */ return; /* Update transports view of the MTU. Return if no update was needed. * If an update wasn't needed/possible, it also doesn't make sense to * try to retransmit now. */ if (!sctp_transport_update_pmtu(t, pmtu)) return; /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); /* Retransmit with the new pmtu setting. */ sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, struct sk_buff *skb) { struct dst_entry *dst; if (sock_owned_by_user(sk) || !t) return; dst = sctp_transport_dst_check(t); if (dst) dst->ops->redirect(dst, sk, skb); } /* * SCTP Implementer's Guide, 2.37 ICMP handling procedures * * ICMP8) If the ICMP code is a "Unrecognized next header type encountered" * or a "Protocol Unreachable" treat this message as an abort * with the T bit set. * * This function sends an event to the state machine, which will abort the * association. * */ void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t) { if (sock_owned_by_user(sk)) { if (timer_pending(&t->proto_unreach_timer)) return; else { if (!mod_timer(&t->proto_unreach_timer, jiffies + (HZ/20))) sctp_transport_hold(t); } } else { struct net *net = sock_net(sk); pr_debug("%s: unrecognized next header type " "encountered!\n", __func__); if (del_timer(&t->proto_unreach_timer)) sctp_transport_put(t); sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, t, GFP_ATOMIC); } } /* Common lookup code for icmp/icmpv6 error handler. */ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctphdr *sctphdr, struct sctp_association **app, struct sctp_transport **tpp) { struct sctp_init_chunk *chunkhdr, _chunkhdr; union sctp_addr saddr; union sctp_addr daddr; struct sctp_af *af; struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; __u32 vtag = ntohl(sctphdr->vtag); int sdif = inet_sdif(skb); int dif = inet_iif(skb); *app = NULL; *tpp = NULL; af = sctp_get_af_specific(family); if (unlikely(!af)) { return NULL; } /* Initialize local addresses for lookups. */ af->from_skb(&saddr, skb, 1); af->from_skb(&daddr, skb, 0); /* Look for an association that matches the incoming ICMP error * packet. */ asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport, dif, sdif); if (!asoc) return NULL; sk = asoc->base.sk; /* RFC 4960, Appendix C. ICMP Handling * * ICMP6) An implementation MUST validate that the Verification Tag * contained in the ICMP message matches the Verification Tag of * the peer. If the Verification Tag is not 0 and does NOT * match, discard the ICMP message. If it is 0 and the ICMP * message contains enough bytes to verify that the chunk type is * an INIT chunk and that the Initiate Tag matches the tag of the * peer, continue with ICMP7. If the ICMP message is too short * or the chunk type or the Initiate Tag does not match, silently * discard the packet. */ if (vtag == 0) { /* chunk header + first 4 octects of init header */ chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) + sizeof(struct sctphdr), sizeof(struct sctp_chunkhdr) + sizeof(__be32), &_chunkhdr); if (!chunkhdr || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) goto out; } else if (vtag != asoc->c.peer_vtag) { goto out; } bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; return sk; out: sctp_transport_put(transport); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ void sctp_err_finish(struct sock *sk, struct sctp_transport *t) __releases(&((__sk)->sk_lock.slock)) { bh_unlock_sock(sk); sctp_transport_put(t); } static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb, __u8 type, __u8 code, __u32 info) { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; int err = 0; switch (type) { case ICMP_PARAMETERPROB: err = EPROTO; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) return; if (code == ICMP_FRAG_NEEDED) { sctp_icmp_frag_needed(sk, asoc, t, SCTP_TRUNC4(info)); return; } if (code == ICMP_PROT_UNREACH) { sctp_icmp_proto_unreachable(sk, asoc, t); return; } err = icmp_err_convert[code].errno; break; case ICMP_TIME_EXCEEDED: if (code == ICMP_EXC_FRAGTIME) return; err = EHOSTUNREACH; break; case ICMP_REDIRECT: sctp_icmp_redirect(sk, t, skb); return; default: return; } if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) { sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ WRITE_ONCE(sk->sk_err_soft, err); } } /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should * be closed and the error returned to the user. If err > 0 * it's just the icmp type << 8 | icmp code. After adjustment * header points to the first 8 bytes of the sctp header. We need * to find the appropriate port. * * The locking strategy used here is very "optimistic". When * someone else accesses the socket the ICMP is just dropped * and for some paths there is no check at all. * A more general error queue to queue errors for later handling * is probably better. * */ int sctp_v4_err(struct sk_buff *skb, __u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); struct sctp_transport *transport; struct sctp_association *asoc; __u16 saveip, savesctp; struct sock *sk; /* Fix up skb to look at the embedded net header. */ saveip = skb->network_header; savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, iph->ihl * 4); sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original values. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return -ENOENT; } sctp_v4_err_handle(transport, skb, type, code, info); sctp_err_finish(sk, transport); return 0; } int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct sctp_association *asoc; struct sctp_transport *t; struct icmphdr *hdr; __u32 info = 0; skb->transport_header += sizeof(struct udphdr); sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &t); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return -ENOENT; } skb->transport_header -= sizeof(struct udphdr); hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr)); if (hdr->type == ICMP_REDIRECT) { /* can't be handled without outer iphdr known, leave it to udp_err */ sctp_err_finish(sk, t); return 0; } if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED) info = ntohs(hdr->un.frag.mtu); sctp_v4_err_handle(t, skb, hdr->type, hdr->code, info); sctp_err_finish(sk, t); return 1; } /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * * This function scans all the chunks in the OOTB packet to determine if * the packet should be discarded right away. If a response might be needed * for this packet, or, if further processing is possible, the packet will * be queued to a proper inqueue for the next phase of handling. * * Output: * Return 0 - If further processing is needed. * Return 1 - If the packet can be discarded right away. */ static int sctp_rcv_ootb(struct sk_buff *skb) { struct sctp_chunkhdr *ch, _ch; int ch_end, offset = 0; /* Scan through all the chunks in the packet. */ do { /* Make sure we have at least the header there */ if (offset + sizeof(_ch) > skb->len) break; ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); /* Break out if chunk length is less then minimal. */ if (!ch || ntohs(ch->length) < sizeof(_ch)) break; ch_end = offset + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb->len) break; /* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the * receiver MUST silently discard the OOTB packet and take no * further action. */ if (SCTP_CID_ABORT == ch->type) goto discard; /* RFC 8.4, 6) If the packet contains a SHUTDOWN COMPLETE * chunk, the receiver should silently discard the packet * and take no further action. */ if (SCTP_CID_SHUTDOWN_COMPLETE == ch->type) goto discard; /* RFC 4460, 2.11.2 * This will discard packets with INIT chunk bundled as * subsequent chunks in the packet. When INIT is first, * the normal INIT processing will discard the chunk. */ if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data) goto discard; offset = ch_end; } while (ch_end < skb->len); return 0; discard: return 1; } /* Insert endpoint into the hash table. */ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) { struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; int cnt = 0; err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; sctp_for_each_hentry(ep2, &head->chain) { struct sock *sk2 = ep2->base.sk; if (!net_eq(sock_net(sk2), net) || sk2 == sk || !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) || !sk2->sk_reuseport) continue; err = sctp_bind_addrs_check(sctp_sk(sk2), sctp_sk(sk), cnt); if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) goto out; break; } else if (err < 0) { goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) goto out; } } hlist_add_head(&ep->node, &head->chain); out: write_unlock(&head->lock); return err; } /* Add an endpoint to the hash. Local BH-safe. */ int sctp_hash_endpoint(struct sctp_endpoint *ep) { int err; local_bh_disable(); err = __sctp_hash_endpoint(ep); local_bh_enable(); return err; } /* Remove endpoint from the hash table. */ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { struct sock *sk = ep->base.sk; struct sctp_hashbucket *head; ep->hashent = sctp_ep_hashfn(sock_net(sk), ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); hlist_del_init(&ep->node); write_unlock(&head->lock); } /* Remove endpoint from the hash. Local BH-safe. */ void sctp_unhash_endpoint(struct sctp_endpoint *ep) { local_bh_disable(); __sctp_unhash_endpoint(ep); local_bh_enable(); } static inline __u32 sctp_hashfn(const struct net *net, __be16 lport, const union sctp_addr *paddr, __u32 seed) { __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else addr = (__force __u32)paddr->v4.sin_addr.s_addr; return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } /* Look up an endpoint. */ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, const union sctp_addr *paddr, int dif, int sdif) { struct sctp_hashbucket *head; struct sctp_endpoint *ep; struct sock *sk; __be16 lport; int hash; lport = laddr->v4.sin_port; hash = sctp_ep_hashfn(net, ntohs(lport)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(ep, &head->chain) { if (sctp_endpoint_is_match(ep, net, laddr, dif, sdif)) goto hit; } ep = sctp_sk(net->sctp.ctl_sock)->ep; hit: sk = ep->base.sk; if (sk->sk_reuseport) { __u32 phash = sctp_hashfn(net, lport, paddr, 0); sk = reuseport_select_sock(sk, phash, skb, sizeof(struct sctphdr)); if (sk) ep = sctp_sk(sk)->ep; } sctp_endpoint_hold(ep); read_unlock(&head->lock); return ep; } /* rhashtable for transport */ struct sctp_hash_cmp_arg { const union sctp_addr *paddr; const struct net *net; __be16 lport; }; static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { struct sctp_transport *t = (struct sctp_transport *)ptr; const struct sctp_hash_cmp_arg *x = arg->key; int err = 1; if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) return err; if (!sctp_transport_hold(t)) return err; if (!net_eq(t->asoc->base.net, x->net)) goto out; if (x->lport != htons(t->asoc->base.bind_addr.port)) goto out; err = 0; out: sctp_transport_put(t); return err; } static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; return sctp_hashfn(t->asoc->base.net, htons(t->asoc->base.bind_addr.port), &t->ipaddr, seed); } static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed) { const struct sctp_hash_cmp_arg *x = data; return sctp_hashfn(x->net, x->lport, x->paddr, seed); } static const struct rhashtable_params sctp_hash_params = { .head_offset = offsetof(struct sctp_transport, node), .hashfn = sctp_hash_key, .obj_hashfn = sctp_hash_obj, .obj_cmpfn = sctp_hash_cmp, .automatic_shrinking = true, }; int sctp_transport_hashtable_init(void) { return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params); } void sctp_transport_hashtable_destroy(void) { rhltable_destroy(&sctp_transport_hashtable); } int sctp_hash_transport(struct sctp_transport *t) { struct sctp_transport *transport; struct rhlist_head *tmp, *list; struct sctp_hash_cmp_arg arg; int err; if (t->asoc->temp) return 0; arg.net = t->asoc->base.net; arg.paddr = &t->ipaddr; arg.lport = htons(t->asoc->base.bind_addr.port); rcu_read_lock(); list = rhltable_lookup(&sctp_transport_hashtable, &arg, sctp_hash_params); rhl_for_each_entry_rcu(transport, tmp, list, node) if (transport->asoc->ep == t->asoc->ep) { rcu_read_unlock(); return -EEXIST; } rcu_read_unlock(); err = rhltable_insert_key(&sctp_transport_hashtable, &arg, &t->node, sctp_hash_params); if (err) pr_err_once("insert transport fail, errno %d\n", err); return err; } void sctp_unhash_transport(struct sctp_transport *t) { if (t->asoc->temp) return; rhltable_remove(&sctp_transport_hashtable, &t->node, sctp_hash_params); } bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { bool l3mdev_accept = true; #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) l3mdev_accept = !!READ_ONCE(net->sctp.l3mdev_accept); #endif return inet_bound_dev_eq(l3mdev_accept, bound_dev_if, dif, sdif); } /* return a transport with holding it */ struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, int dif, int sdif) { struct rhlist_head *tmp, *list; struct sctp_transport *t; int bound_dev_if; struct sctp_hash_cmp_arg arg = { .paddr = paddr, .net = net, .lport = laddr->v4.sin_port, }; list = rhltable_lookup(&sctp_transport_hashtable, &arg, sctp_hash_params); rhl_for_each_entry_rcu(t, tmp, list, node) { if (!sctp_transport_hold(t)) continue; bound_dev_if = READ_ONCE(t->asoc->base.sk->sk_bound_dev_if); if (sctp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif) && sctp_bind_addr_match(&t->asoc->base.bind_addr, laddr, sctp_sk(t->asoc->base.sk))) return t; sctp_transport_put(t); } return NULL; } /* return a transport without holding it, as it's only used under sock lock */ struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr) { struct rhlist_head *tmp, *list; struct sctp_transport *t; struct sctp_hash_cmp_arg arg = { .paddr = paddr, .net = ep->base.net, .lport = htons(ep->base.bind_addr.port), }; list = rhltable_lookup(&sctp_transport_hashtable, &arg, sctp_hash_params); rhl_for_each_entry_rcu(t, tmp, list, node) if (ep == t->asoc->ep) return t; return NULL; } /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt, int dif, int sdif) { struct sctp_transport *t; struct sctp_association *asoc = NULL; t = sctp_addrs_lookup_transport(net, local, peer, dif, sdif); if (!t) goto out; asoc = t->asoc; *pt = t; out: return asoc; } /* Look up an association. protected by RCU read lock */ static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc; rcu_read_lock(); asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); rcu_read_unlock(); return asoc; } /* Is there an association matching the given local and peer addresses? */ bool sctp_has_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, int dif, int sdif) { struct sctp_transport *transport; if (sctp_lookup_association(net, laddr, paddr, &transport, dif, sdif)) { sctp_transport_put(transport); return true; } return false; } /* * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. * * D) When searching for a matching TCB upon reception of an INIT * or INIT-ACK chunk the receiver SHOULD use not only the * source address of the packet (containing the INIT or * INIT-ACK) but the receiver SHOULD also use all valid * address parameters contained within the chunk. * * 2.18.3 Solution description * * This new text clearly specifies to an implementor the need * to look within the INIT or INIT-ACK. Any implementation that * does not do this, may not be able to establish associations * in certain circumstances. * */ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc; union sctp_addr addr; union sctp_addr *paddr = &addr; struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; struct sctp_init_chunk *init; struct sctp_af *af; /* * This code will NOT touch anything inside the chunk--it is * strictly READ-ONLY. * * RFC 2960 3 SCTP packet Format * * Multiple chunks can be bundled into one SCTP packet up to * the MTU size, except for the INIT, INIT ACK, and SHUTDOWN * COMPLETE chunks. These chunks MUST NOT be bundled with any * other chunk in a packet. See Section 6.10 for more details * on chunk bundling. */ /* Find the start of the TLVs and the end of the chunk. This is * the region we search for address parameters. */ init = (struct sctp_init_chunk *)skb->data; /* Walk the parameters looking for embedded addresses. */ sctp_walk_params(params, init) { /* Note: Ignoring hostname addresses. */ af = sctp_get_af_specific(param_type2af(params.p->type)); if (!af) continue; if (!af->from_addr_param(paddr, params.addr, sh->source, 0)) continue; asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) return asoc; } return NULL; } /* ADD-IP, Section 5.2 * When an endpoint receives an ASCONF Chunk from the remote peer * special procedures may be needed to identify the association the * ASCONF Chunk is associated with. To properly find the association * the following procedures SHOULD be followed: * * D2) If the association is not found, use the address found in the * Address Parameter TLV combined with the port number found in the * SCTP common header. If found proceed to rule D4. * * D2-ext) If more than one ASCONF Chunks are packed together, use the * address found in the ASCONF Address Parameter TLV of each of the * subsequent ASCONF Chunks. If found, proceed to rule D4. */ static struct sctp_association *__sctp_rcv_asconf_lookup( struct net *net, struct sctp_chunkhdr *ch, const union sctp_addr *laddr, __be16 peer_port, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch; struct sctp_af *af; union sctp_addr_param *param; union sctp_addr paddr; if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr)) return NULL; /* Skip over the ADDIP header and find the Address parameter */ param = (union sctp_addr_param *)(asconf + 1); af = sctp_get_af_specific(param_type2af(param->p.type)); if (unlikely(!af)) return NULL; if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; return __sctp_lookup_association(net, laddr, &paddr, transportp, dif, sdif); } /* SCTP-AUTH, Section 6.3: * If the receiver does not find a STCB for a packet containing an AUTH * chunk as the first chunk and not a COOKIE-ECHO chunk as the second * chunk, it MUST use the chunks after the AUTH chunk to look up an existing * association. * * This means that any chunks that can help us identify the association need * to be looked at to find this association. */ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc = NULL; struct sctp_chunkhdr *ch; int have_auth = 0; unsigned int chunk_num = 1; __u8 *ch_end; /* Walk through the chunks looking for AUTH or ASCONF chunks * to help us find the association. */ ch = (struct sctp_chunkhdr *)skb->data; do { /* Break out if chunk length is less then minimal. */ if (ntohs(ch->length) < sizeof(*ch)) break; ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) break; switch (ch->type) { case SCTP_CID_AUTH: have_auth = chunk_num; break; case SCTP_CID_COOKIE_ECHO: /* If a packet arrives containing an AUTH chunk as * a first chunk, a COOKIE-ECHO chunk as the second * chunk, and possibly more chunks after them, and * the receiver does not have an STCB for that * packet, then authentication is based on * the contents of the COOKIE- ECHO chunk. */ if (have_auth == 1 && chunk_num == 2) return NULL; break; case SCTP_CID_ASCONF: if (have_auth || net->sctp.addip_noauth) asoc = __sctp_rcv_asconf_lookup( net, ch, laddr, sctp_hdr(skb)->source, transportp, dif, sdif); break; default: break; } if (asoc) break; ch = (struct sctp_chunkhdr *)ch_end; chunk_num++; } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb)); return asoc; } /* * There are circumstances when we need to look inside the SCTP packet * for information to help us find the association. Examples * include looking inside of INIT/INIT-ACK chunks or after the AUTH * chunks. */ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_chunkhdr *ch; /* We do not allow GSO frames here as we need to linearize and * then cannot guarantee frame boundaries. This shouldn't be an * issue as packets hitting this are mostly INIT or INIT-ACK and * those cannot be on GSO-style anyway. */ if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return NULL; ch = (struct sctp_chunkhdr *)skb->data; /* The code below will attempt to walk the chunk and extract * parameter information. Before we do that, we need to verify * that the chunk length doesn't cause overflow. Otherwise, we'll * walk off the end. */ if (SCTP_PAD4(ntohs(ch->length)) > skb->len) return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK) return __sctp_rcv_init_lookup(net, skb, laddr, transportp, dif, sdif); return __sctp_rcv_walk_lookup(net, skb, laddr, transportp, dif, sdif); } /* Lookup an association for an inbound skb. */ static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc; asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) goto out; /* Further lookup for INIT/INIT-ACK packets. * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. */ asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp, dif, sdif); if (asoc) goto out; if (paddr->sa.sa_family == AF_INET) pr_debug("sctp: asoc not found for src:%pI4:%d dst:%pI4:%d\n", &laddr->v4.sin_addr, ntohs(laddr->v4.sin_port), &paddr->v4.sin_addr, ntohs(paddr->v4.sin_port)); else pr_debug("sctp: asoc not found for src:%pI6:%d dst:%pI6:%d\n", &laddr->v6.sin6_addr, ntohs(laddr->v6.sin6_port), &paddr->v6.sin6_addr, ntohs(paddr->v6.sin6_port)); out: return asoc; }
352 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Supervisor Mode Access Prevention support * * Copyright (C) 2012 Intel Corporation * Author: H. Peter Anvin <hpa@linux.intel.com> */ #ifndef _ASM_X86_SMAP_H #define _ASM_X86_SMAP_H #include <asm/nops.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> #ifdef __ASSEMBLY__ #define ASM_CLAC \ ALTERNATIVE "", "clac", X86_FEATURE_SMAP #define ASM_STAC \ ALTERNATIVE "", "stac", X86_FEATURE_SMAP #else /* __ASSEMBLY__ */ static __always_inline void clac(void) { /* Note: a barrier is implicit in alternative() */ alternative("", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ alternative("", "stac", X86_FEATURE_SMAP); } static __always_inline unsigned long smap_save(void) { unsigned long flags; asm volatile ("# smap_save\n\t" ALTERNATIVE("", "pushf; pop %0; " "clac" "\n\t", X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); return flags; } static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" ALTERNATIVE("", "push %0; popf\n\t", X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } /* These macros can be used in asm() statements */ #define ASM_CLAC \ ALTERNATIVE("", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ ALTERNATIVE("", "stac", X86_FEATURE_SMAP) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMAP_H */
18 32 17 23 40 1 39 2 26 14 9 9 4155 4132 57 7 6 7 6 18 17 6 4 25 6 1 9 12 18 18 18 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 // SPDX-License-Identifier: GPL-2.0-or-later /* * Directory notifications for Linux. * * Copyright (C) 2000,2001,2002 Stephen Rothwell * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * dnotify was largly rewritten to use the new fsnotify infrastructure */ #include <linux/fs.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/dnotify.h> #include <linux/init.h> #include <linux/security.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/fsnotify_backend.h> static int dir_notify_enable __read_mostly = 1; #ifdef CONFIG_SYSCTL static const struct ctl_table dnotify_sysctls[] = { { .procname = "dir-notify-enable", .data = &dir_notify_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static void __init dnotify_sysctl_init(void) { register_sysctl_init("fs", dnotify_sysctls); } #else #define dnotify_sysctl_init() do { } while (0) #endif static struct kmem_cache *dnotify_struct_cache __ro_after_init; static struct kmem_cache *dnotify_mark_cache __ro_after_init; static struct fsnotify_group *dnotify_group __ro_after_init; /* * dnotify will attach one of these to each inode (i_fsnotify_marks) which * is being watched by dnotify. If multiple userspace applications are watching * the same directory with dnotify their information is chained in dn */ struct dnotify_mark { struct fsnotify_mark fsn_mark; struct dnotify_struct *dn; }; /* * When a process starts or stops watching an inode the set of events which * dnotify cares about for that inode may change. This function runs the * list of everything receiving dnotify events about this directory and calculates * the set of all those events. After it updates what dnotify is interested in * it calls the fsnotify function so it can update the set of all events relevant * to this inode. */ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) { __u32 new_mask = 0; struct dnotify_struct *dn; struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); assert_spin_locked(&fsn_mark->lock); for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next) new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); if (fsn_mark->mask == new_mask) return; fsn_mark->mask = new_mask; fsnotify_recalc_mask(fsn_mark->connector); } /* * Mains fsnotify call where events are delivered to dnotify. * Find the dnotify mark on the relevant inode, run the list of dnotify structs * on that mark and determine which of them has expressed interest in receiving * events of this type. When found send the correct process and signal and * destroy the dnotify struct if it was not registered to receive multiple * events. */ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; struct fown_struct *fown; __u32 test_mask = mask & ~FS_EVENT_ON_CHILD; /* not a dir, dnotify doesn't care */ if (!dir && !(mask & FS_ISDIR)) return 0; dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); spin_lock(&inode_mark->lock); prev = &dn_mark->dn; while ((dn = *prev) != NULL) { if ((dn->dn_mask & test_mask) == 0) { prev = &dn->dn_next; continue; } fown = file_f_owner(dn->dn_filp); send_sigio(fown, dn->dn_fd, POLL_MSG); if (dn->dn_mask & FS_DN_MULTISHOT) prev = &dn->dn_next; else { *prev = dn->dn_next; kmem_cache_free(dnotify_struct_cache, dn); dnotify_recalc_inode_mask(inode_mark); } } spin_unlock(&inode_mark->lock); return 0; } static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) { struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); BUG_ON(dn_mark->dn); kmem_cache_free(dnotify_mark_cache, dn_mark); } static const struct fsnotify_ops dnotify_fsnotify_ops = { .handle_inode_event = dnotify_handle_event, .free_mark = dnotify_free_mark, }; /* * Called every time a file is closed. Looks first for a dnotify mark on the * inode. If one is found run all of the ->dn structures attached to that * mark for one relevant to this process closing the file and remove that * dnotify_struct. If that was the last dnotify_struct also remove the * fsnotify_mark. */ void dnotify_flush(struct file *filp, fl_owner_t id) { struct fsnotify_mark *fsn_mark; struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; struct inode *inode; bool free = false; inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) return; fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); fsnotify_group_lock(dnotify_group); spin_lock(&fsn_mark->lock); prev = &dn_mark->dn; while ((dn = *prev) != NULL) { if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { *prev = dn->dn_next; kmem_cache_free(dnotify_struct_cache, dn); dnotify_recalc_inode_mask(fsn_mark); break; } prev = &dn->dn_next; } spin_unlock(&fsn_mark->lock); /* nothing else could have found us thanks to the dnotify_groups mark_mutex */ if (dn_mark->dn == NULL) { fsnotify_detach_mark(fsn_mark); free = true; } fsnotify_group_unlock(dnotify_group); if (free) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); } /* this conversion is done only at watch creation */ static __u32 convert_arg(unsigned int arg) { __u32 new_mask = FS_EVENT_ON_CHILD; if (arg & DN_MULTISHOT) new_mask |= FS_DN_MULTISHOT; if (arg & DN_DELETE) new_mask |= (FS_DELETE | FS_MOVED_FROM); if (arg & DN_MODIFY) new_mask |= FS_MODIFY; if (arg & DN_ACCESS) new_mask |= FS_ACCESS; if (arg & DN_ATTRIB) new_mask |= FS_ATTRIB; if (arg & DN_RENAME) new_mask |= FS_RENAME; if (arg & DN_CREATE) new_mask |= (FS_CREATE | FS_MOVED_TO); return new_mask; } /* * If multiple processes watch the same inode with dnotify there is only one * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct * onto that mark. This function either attaches the new dnotify_struct onto * that list, or it |= the mask onto an existing dnofiy_struct. */ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark, fl_owner_t id, int fd, struct file *filp, __u32 mask) { struct dnotify_struct *odn; odn = dn_mark->dn; while (odn != NULL) { /* adding more events to existing dnofiy_struct? */ if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { odn->dn_fd = fd; odn->dn_mask |= mask; return -EEXIST; } odn = odn->dn_next; } dn->dn_mask = mask; dn->dn_fd = fd; dn->dn_filp = filp; dn->dn_owner = id; dn->dn_next = dn_mark->dn; dn_mark->dn = dn; return 0; } /* * When a process calls fcntl to attach a dnotify watch to a directory it ends * up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be * attached to the fsnotify_mark. */ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) { struct dnotify_mark *new_dn_mark, *dn_mark; struct fsnotify_mark *new_fsn_mark, *fsn_mark; struct dnotify_struct *dn; struct inode *inode; fl_owner_t id = current->files; struct file *f = NULL; int destroy = 0, error = 0; __u32 mask; /* we use these to tell if we need to kfree */ new_fsn_mark = NULL; dn = NULL; if (!dir_notify_enable) { error = -EINVAL; goto out_err; } /* a 0 mask means we are explicitly removing the watch */ if ((arg & ~DN_MULTISHOT) == 0) { dnotify_flush(filp, id); error = 0; goto out_err; } /* dnotify only works on directories */ inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) { error = -ENOTDIR; goto out_err; } /* * convert the userspace DN_* "arg" to the internal FS_* * defined in fsnotify */ mask = convert_arg(arg); error = security_path_notify(&filp->f_path, mask, FSNOTIFY_OBJ_TYPE_INODE); if (error) goto out_err; /* expect most fcntl to add new rather than augment old */ dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL); if (!dn) { error = -ENOMEM; goto out_err; } /* new fsnotify mark, we expect most fcntl calls to add a new mark */ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); if (!new_dn_mark) { error = -ENOMEM; goto out_err; } error = file_f_owner_allocate(filp); if (error) goto out_err; /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); new_fsn_mark->mask = mask; new_dn_mark->dn = NULL; /* this is needed to prevent the fcntl/close race described below */ fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { fsnotify_group_unlock(dnotify_group); goto out_err; } spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; /* we used new_fsn_mark, so don't free it */ new_fsn_mark = NULL; } f = fget_raw(fd); /* if (f != filp) means that we lost a race and another task/thread * actually closed the fd we are still playing with before we grabbed * the dnotify_groups mark_mutex and fsn_mark->lock. Since closing the * fd is the only time we clean up the marks we need to get our mark * off the list. */ if (f != filp) { /* if we added ourselves, shoot ourselves, it's possible that * the flush actually did shoot this fsn_mark. That's fine too * since multiple calls to destroy_mark is perfectly safe, if * we found a dn_mark already attached to the inode, just sod * off silently as the flush at close time dealt with it. */ if (dn_mark == new_dn_mark) destroy = 1; error = 0; goto out; } __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); error = attach_dn(dn, dn_mark, id, fd, filp, mask); /* !error means that we attached the dn to the dn_mark, so don't free it */ if (!error) dn = NULL; /* -EEXIST means that we didn't add this new dn and used an old one. * that isn't an error (and the unused dn should be freed) */ else if (error == -EEXIST) error = 0; dnotify_recalc_inode_mask(fsn_mark); out: spin_unlock(&fsn_mark->lock); if (destroy) fsnotify_detach_mark(fsn_mark); fsnotify_group_unlock(dnotify_group); if (destroy) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); out_err: if (new_fsn_mark) fsnotify_put_mark(new_fsn_mark); if (dn) kmem_cache_free(dnotify_struct_cache, dn); if (f) fput(f); return error; } static int __init dnotify_init(void) { dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, 0); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); dnotify_sysctl_init(); return 0; } module_init(dnotify_init)
8 8 8 1 1 9 2 4 3 9 6 3 7 15 3 12 2 9 3 1 1 19 1 18 18 18 18 18 18 18 4 4 14 14 14 14 14 14 14 14 14 14 14 14 1 13 9 3 6 3 6 18 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/sysv/inode.c * * minix/inode.c * Copyright (C) 1991, 1992 Linus Torvalds * * xenix/inode.c * Copyright (C) 1992 Doug Evans * * coh/inode.c * Copyright (C) 1993 Pascal Haible, Bruno Haible * * sysv/inode.c * Copyright (C) 1993 Paul B. Monday * * sysv/inode.c * Copyright (C) 1993 Bruno Haible * Copyright (C) 1997, 1998 Krzysztof G. Baranowski * * This file contains code for allocating/freeing inodes and for read/writing * the superblock. */ #include <linux/highuid.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/namei.h> #include <asm/byteorder.h> #include "sysv.h" static int sysv_sync_fs(struct super_block *sb, int wait) { struct sysv_sb_info *sbi = SYSV_SB(sb); u32 time = (u32)ktime_get_real_seconds(), old_time; mutex_lock(&sbi->s_lock); /* * If we are going to write out the super block, * then attach current time stamp. * But if the filesystem was marked clean, keep it clean. */ old_time = fs32_to_cpu(sbi, *sbi->s_sb_time); if (sbi->s_type == FSTYPE_SYSV4) { if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38u - old_time)) *sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38u - time); *sbi->s_sb_time = cpu_to_fs32(sbi, time); mark_buffer_dirty(sbi->s_bh2); } mutex_unlock(&sbi->s_lock); return 0; } static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); sync_filesystem(sb); if (sbi->s_forced_ro) *flags |= SB_RDONLY; return 0; } static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); if (!sb_rdonly(sb)) { /* XXX ext2 also updates the state here */ mark_buffer_dirty(sbi->s_bh1); if (sbi->s_bh1 != sbi->s_bh2) mark_buffer_dirty(sbi->s_bh2); } brelse(sbi->s_bh1); if (sbi->s_bh1 != sbi->s_bh2) brelse(sbi->s_bh2); kfree(sbi); } static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct sysv_sb_info *sbi = SYSV_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = sb->s_magic; buf->f_bsize = sb->s_blocksize; buf->f_blocks = sbi->s_ndatazones; buf->f_bavail = buf->f_bfree = sysv_count_free_blocks(sb); buf->f_files = sbi->s_ninodes; buf->f_ffree = sysv_count_free_inodes(sb); buf->f_namelen = SYSV_NAMELEN; buf->f_fsid = u64_to_fsid(id); return 0; } /* * NXI <-> N0XI for PDP, XIN <-> XIN0 for le32, NIX <-> 0NIX for be32 */ static inline void read3byte(struct sysv_sb_info *sbi, unsigned char * from, unsigned char * to) { if (sbi->s_bytesex == BYTESEX_PDP) { to[0] = from[0]; to[1] = 0; to[2] = from[1]; to[3] = from[2]; } else if (sbi->s_bytesex == BYTESEX_LE) { to[0] = from[0]; to[1] = from[1]; to[2] = from[2]; to[3] = 0; } else { to[0] = 0; to[1] = from[0]; to[2] = from[1]; to[3] = from[2]; } } static inline void write3byte(struct sysv_sb_info *sbi, unsigned char * from, unsigned char * to) { if (sbi->s_bytesex == BYTESEX_PDP) { to[0] = from[0]; to[1] = from[2]; to[2] = from[3]; } else if (sbi->s_bytesex == BYTESEX_LE) { to[0] = from[0]; to[1] = from[1]; to[2] = from[2]; } else { to[0] = from[1]; to[1] = from[2]; to[2] = from[3]; } } static const struct inode_operations sysv_symlink_inode_operations = { .get_link = page_get_link, .getattr = sysv_getattr, }; void sysv_set_inode(struct inode *inode, dev_t rdev) { if (S_ISREG(inode->i_mode)) { inode->i_op = &sysv_file_inode_operations; inode->i_fop = &sysv_file_operations; inode->i_mapping->a_ops = &sysv_aops; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &sysv_dir_inode_operations; inode->i_fop = &sysv_dir_operations; inode->i_mapping->a_ops = &sysv_aops; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &sysv_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &sysv_aops; } else init_special_inode(inode, inode->i_mode, rdev); } struct inode *sysv_iget(struct super_block *sb, unsigned int ino) { struct sysv_sb_info * sbi = SYSV_SB(sb); struct buffer_head * bh; struct sysv_inode * raw_inode; struct sysv_inode_info * si; struct inode *inode; unsigned int block; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %d is out of range\n", sb->s_id, ino); return ERR_PTR(-EIO); } inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; raw_inode = sysv_raw_inode(sb, ino, &bh); if (!raw_inode) { printk("Major problem: unable to read inode from dev %s\n", inode->i_sb->s_id); goto bad_inode; } /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */ inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); i_uid_write(inode, (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid)); i_gid_write(inode, (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid)); set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink)); inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); inode_set_atime(inode, fs32_to_cpu(sbi, raw_inode->i_atime), 0); inode_set_mtime(inode, fs32_to_cpu(sbi, raw_inode->i_mtime), 0); inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->i_ctime), 0); inode->i_blocks = 0; si = SYSV_I(inode); for (block = 0; block < 10+1+1+1; block++) read3byte(sbi, &raw_inode->i_data[3*block], (u8 *)&si->i_data[block]); brelse(bh); si->i_dir_start_lookup = 0; if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) sysv_set_inode(inode, old_decode_dev(fs32_to_cpu(sbi, si->i_data[0]))); else sysv_set_inode(inode, 0); unlock_new_inode(inode); return inode; bad_inode: iget_failed(inode); return ERR_PTR(-EIO); } static int __sysv_write_inode(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); struct buffer_head * bh; struct sysv_inode * raw_inode; struct sysv_inode_info * si; unsigned int ino, block; int err = 0; ino = inode->i_ino; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %d is out of range\n", inode->i_sb->s_id, ino); return -EIO; } raw_inode = sysv_raw_inode(sb, ino, &bh); if (!raw_inode) { printk("unable to read i-node block\n"); return -EIO; } raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(i_uid_read(inode))); raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(i_gid_read(inode))); raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink); raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size); raw_inode->i_atime = cpu_to_fs32(sbi, inode_get_atime_sec(inode)); raw_inode->i_mtime = cpu_to_fs32(sbi, inode_get_mtime_sec(inode)); raw_inode->i_ctime = cpu_to_fs32(sbi, inode_get_ctime_sec(inode)); si = SYSV_I(inode); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) si->i_data[0] = cpu_to_fs32(sbi, old_encode_dev(inode->i_rdev)); for (block = 0; block < 10+1+1+1; block++) write3byte(sbi, (u8 *)&si->i_data[block], &raw_inode->i_data[3*block]); mark_buffer_dirty(bh); if (wait) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { printk ("IO error syncing sysv inode [%s:%08x]\n", sb->s_id, ino); err = -EIO; } } brelse(bh); return err; } int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) { return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } int sysv_sync_inode(struct inode *inode) { return __sysv_write_inode(inode, 1); } static void sysv_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); if (!inode->i_nlink) { inode->i_size = 0; sysv_truncate(inode); } invalidate_inode_buffers(inode); clear_inode(inode); if (!inode->i_nlink) sysv_free_inode(inode); } static struct kmem_cache *sysv_inode_cachep; static struct inode *sysv_alloc_inode(struct super_block *sb) { struct sysv_inode_info *si; si = alloc_inode_sb(sb, sysv_inode_cachep, GFP_KERNEL); if (!si) return NULL; return &si->vfs_inode; } static void sysv_free_in_core_inode(struct inode *inode) { kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } static void init_once(void *p) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; inode_init_once(&si->vfs_inode); } const struct super_operations sysv_sops = { .alloc_inode = sysv_alloc_inode, .free_inode = sysv_free_in_core_inode, .write_inode = sysv_write_inode, .evict_inode = sysv_evict_inode, .put_super = sysv_put_super, .sync_fs = sysv_sync_fs, .remount_fs = sysv_remount, .statfs = sysv_statfs, }; int __init sysv_init_icache(void) { sysv_inode_cachep = kmem_cache_create("sysv_inode_cache", sizeof(struct sysv_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, init_once); if (!sysv_inode_cachep) return -ENOMEM; return 0; } void sysv_destroy_icache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(sysv_inode_cachep); }
3 3 3 3 3 1 1 1 1 86 1 1 84 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) ST-Ericsson AB 2010 * Authors: Sjur Brendeland * Daniel Martensson */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/fs.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/sched.h> #include <linux/sockios.h> #include <linux/caif/if_caif.h> #include <net/rtnetlink.h> #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/caif_dev.h> /* GPRS PDP connection has MTU to 1500 */ #define GPRS_PDP_MTU 1500 /* 5 sec. connect timeout */ #define CONNECT_TIMEOUT (5 * HZ) #define CAIF_NET_DEFAULT_QUEUE_LEN 500 #define UNDEF_CONNID 0xffffffff /*This list is protected by the rtnl lock. */ static LIST_HEAD(chnl_net_list); MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol GPRS network device"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("caif"); enum caif_states { CAIF_CONNECTED = 1, CAIF_CONNECTING, CAIF_DISCONNECTED, CAIF_SHUTDOWN }; struct chnl_net { struct cflayer chnl; struct caif_connect_request conn_req; struct list_head list_field; struct net_device *netdev; wait_queue_head_t netmgmt_wq; /* Flow status to remember and control the transmission. */ bool flowenabled; enum caif_states state; }; static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct sk_buff *skb; struct chnl_net *priv; int pktlen; const u8 *ip_version; u8 buf; priv = container_of(layr, struct chnl_net, chnl); skb = (struct sk_buff *) cfpkt_tonative(pkt); /* Get length of CAIF packet. */ pktlen = skb->len; /* Pass some minimum information and * send the packet to the net stack. */ skb->dev = priv->netdev; /* check the version of IP */ ip_version = skb_header_pointer(skb, 0, 1, &buf); if (!ip_version) { kfree_skb(skb); return -EINVAL; } switch (*ip_version >> 4) { case 4: skb->protocol = htons(ETH_P_IP); break; case 6: skb->protocol = htons(ETH_P_IPV6); break; default: kfree_skb(skb); priv->netdev->stats.rx_errors++; return -EINVAL; } /* If we change the header in loop mode, the checksum is corrupted. */ if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb->ip_summed = CHECKSUM_NONE; netif_rx(skb); /* Update statistics. */ priv->netdev->stats.rx_packets++; priv->netdev->stats.rx_bytes += pktlen; return 0; } static int delete_device(struct chnl_net *dev) { ASSERT_RTNL(); if (dev->netdev) unregister_netdevice(dev->netdev); return 0; } static void close_work(struct work_struct *work) { struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); if (dev->state == CAIF_SHUTDOWN) dev_close(dev->netdev); } rtnl_unlock(); } static DECLARE_WORK(close_worker, close_work); static void chnl_hold(struct cflayer *lyr) { struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl); dev_hold(priv->netdev); } static void chnl_put(struct cflayer *lyr) { struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl); dev_put(priv->netdev); } static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, int phyid) { struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); pr_debug("NET flowctrl func called flow: %s\n", flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" : flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" : flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" : "UNKNOWN CTRL COMMAND"); switch (flow) { case CAIF_CTRLCMD_FLOW_OFF_IND: priv->flowenabled = false; netif_stop_queue(priv->netdev); break; case CAIF_CTRLCMD_DEINIT_RSP: priv->state = CAIF_DISCONNECTED; break; case CAIF_CTRLCMD_INIT_FAIL_RSP: priv->state = CAIF_DISCONNECTED; wake_up_interruptible(&priv->netmgmt_wq); break; case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: priv->state = CAIF_SHUTDOWN; netif_tx_disable(priv->netdev); schedule_work(&close_worker); break; case CAIF_CTRLCMD_FLOW_ON_IND: priv->flowenabled = true; netif_wake_queue(priv->netdev); break; case CAIF_CTRLCMD_INIT_RSP: caif_client_register_refcnt(&priv->chnl, chnl_hold, chnl_put); priv->state = CAIF_CONNECTED; priv->flowenabled = true; netif_wake_queue(priv->netdev); wake_up_interruptible(&priv->netmgmt_wq); break; default: break; } } static netdev_tx_t chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct chnl_net *priv; struct cfpkt *pkt = NULL; int len; int result = -1; /* Get our private data. */ priv = netdev_priv(dev); if (skb->len > priv->netdev->mtu) { pr_warn("Size of skb exceeded MTU\n"); kfree_skb(skb); dev->stats.tx_errors++; return NETDEV_TX_OK; } if (!priv->flowenabled) { pr_debug("dropping packets flow off\n"); kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); /* Store original SKB length. */ len = skb->len; pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb); /* Send the packet down the stack. */ result = priv->chnl.dn->transmit(priv->chnl.dn, pkt); if (result) { dev->stats.tx_dropped++; return NETDEV_TX_OK; } /* Update statistics. */ dev->stats.tx_packets++; dev->stats.tx_bytes += len; return NETDEV_TX_OK; } static int chnl_net_open(struct net_device *dev) { struct chnl_net *priv = NULL; int result = -1; int llifindex, headroom, tailroom, mtu; struct net_device *lldev; ASSERT_RTNL(); priv = netdev_priv(dev); if (!priv) { pr_debug("chnl_net_open: no priv\n"); return -ENODEV; } if (priv->state != CAIF_CONNECTING) { priv->state = CAIF_CONNECTING; result = caif_connect_client(dev_net(dev), &priv->conn_req, &priv->chnl, &llifindex, &headroom, &tailroom); if (result != 0) { pr_debug("err: " "Unable to register and open device," " Err:%d\n", result); goto error; } lldev = __dev_get_by_index(dev_net(dev), llifindex); if (lldev == NULL) { pr_debug("no interface?\n"); result = -ENODEV; goto error; } dev->needed_tailroom = tailroom + lldev->needed_tailroom; dev->hard_header_len = headroom + lldev->hard_header_len + lldev->needed_tailroom; /* * MTU, head-room etc is not know before we have a * CAIF link layer device available. MTU calculation may * override initial RTNL configuration. * MTU is minimum of current mtu, link layer mtu pluss * CAIF head and tail, and PDP GPRS contexts max MTU. */ mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom)); mtu = min_t(int, GPRS_PDP_MTU, mtu); dev_set_mtu(dev, mtu); if (mtu < 100) { pr_warn("CAIF Interface MTU too small (%d)\n", mtu); result = -ENODEV; goto error; } } rtnl_unlock(); /* Release RTNL lock during connect wait */ result = wait_event_interruptible_timeout(priv->netmgmt_wq, priv->state != CAIF_CONNECTING, CONNECT_TIMEOUT); rtnl_lock(); if (result == -ERESTARTSYS) { pr_debug("wait_event_interruptible woken by a signal\n"); result = -ERESTARTSYS; goto error; } if (result == 0) { pr_debug("connect timeout\n"); result = -ETIMEDOUT; goto error; } if (priv->state != CAIF_CONNECTED) { pr_debug("connect failed\n"); result = -ECONNREFUSED; goto error; } pr_debug("CAIF Netdevice connected\n"); return 0; error: caif_disconnect_client(dev_net(dev), &priv->chnl); priv->state = CAIF_DISCONNECTED; pr_debug("state disconnected\n"); return result; } static int chnl_net_stop(struct net_device *dev) { struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); priv->state = CAIF_DISCONNECTED; caif_disconnect_client(dev_net(dev), &priv->chnl); return 0; } static int chnl_net_init(struct net_device *dev) { struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); INIT_LIST_HEAD(&priv->list_field); return 0; } static void chnl_net_uninit(struct net_device *dev) { struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); list_del_init(&priv->list_field); } static const struct net_device_ops netdev_ops = { .ndo_open = chnl_net_open, .ndo_stop = chnl_net_stop, .ndo_init = chnl_net_init, .ndo_uninit = chnl_net_uninit, .ndo_start_xmit = chnl_net_start_xmit, }; static void chnl_net_destructor(struct net_device *dev) { struct chnl_net *priv = netdev_priv(dev); caif_free_client(&priv->chnl); } static void ipcaif_net_setup(struct net_device *dev) { struct chnl_net *priv; dev->netdev_ops = &netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = chnl_net_destructor; dev->flags |= IFF_NOARP; dev->flags |= IFF_POINTOPOINT; dev->mtu = GPRS_PDP_MTU; dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN; priv = netdev_priv(dev); priv->chnl.receive = chnl_recv_cb; priv->chnl.ctrlcmd = chnl_flowctrl_cb; priv->netdev = dev; priv->conn_req.protocol = CAIFPROTO_DATAGRAM; priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW; priv->conn_req.priority = CAIF_PRIO_LOW; /* Insert illegal value */ priv->conn_req.sockaddr.u.dgm.connection_id = UNDEF_CONNID; priv->flowenabled = false; init_waitqueue_head(&priv->netmgmt_wq); } static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct chnl_net *priv; u8 loop; priv = netdev_priv(dev); if (nla_put_u32(skb, IFLA_CAIF_IPV4_CONNID, priv->conn_req.sockaddr.u.dgm.connection_id) || nla_put_u32(skb, IFLA_CAIF_IPV6_CONNID, priv->conn_req.sockaddr.u.dgm.connection_id)) goto nla_put_failure; loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP; if (nla_put_u8(skb, IFLA_CAIF_LOOPBACK, loop)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static void caif_netlink_parms(struct nlattr *data[], struct caif_connect_request *conn_req) { if (!data) { pr_warn("no params data found\n"); return; } if (data[IFLA_CAIF_IPV4_CONNID]) conn_req->sockaddr.u.dgm.connection_id = nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]); if (data[IFLA_CAIF_IPV6_CONNID]) conn_req->sockaddr.u.dgm.connection_id = nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]); if (data[IFLA_CAIF_LOOPBACK]) { if (nla_get_u8(data[IFLA_CAIF_LOOPBACK])) conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP; else conn_req->protocol = CAIFPROTO_DATAGRAM; } } static int ipcaif_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { int ret; struct chnl_net *caifdev; ASSERT_RTNL(); caifdev = netdev_priv(dev); caif_netlink_parms(data, &caifdev->conn_req); ret = register_netdevice(dev); if (ret) pr_warn("device rtml registration failed\n"); else list_add(&caifdev->list_field, &chnl_net_list); /* Use ifindex as connection id, and use loopback channel default. */ if (caifdev->conn_req.sockaddr.u.dgm.connection_id == UNDEF_CONNID) { caifdev->conn_req.sockaddr.u.dgm.connection_id = dev->ifindex; caifdev->conn_req.protocol = CAIFPROTO_DATAGRAM_LOOP; } return ret; } static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct chnl_net *caifdev; ASSERT_RTNL(); caifdev = netdev_priv(dev); caif_netlink_parms(data, &caifdev->conn_req); netdev_state_change(dev); return 0; } static size_t ipcaif_get_size(const struct net_device *dev) { return /* IFLA_CAIF_IPV4_CONNID */ nla_total_size(4) + /* IFLA_CAIF_IPV6_CONNID */ nla_total_size(4) + /* IFLA_CAIF_LOOPBACK */ nla_total_size(2) + 0; } static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = { [IFLA_CAIF_IPV4_CONNID] = { .type = NLA_U32 }, [IFLA_CAIF_IPV6_CONNID] = { .type = NLA_U32 }, [IFLA_CAIF_LOOPBACK] = { .type = NLA_U8 } }; static struct rtnl_link_ops ipcaif_link_ops __read_mostly = { .kind = "caif", .priv_size = sizeof(struct chnl_net), .setup = ipcaif_net_setup, .maxtype = IFLA_CAIF_MAX, .policy = ipcaif_policy, .newlink = ipcaif_newlink, .changelink = ipcaif_changelink, .get_size = ipcaif_get_size, .fill_info = ipcaif_fill_info, }; static int __init chnl_init_module(void) { return rtnl_link_register(&ipcaif_link_ops); } static void __exit chnl_exit_module(void) { struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; rtnl_link_unregister(&ipcaif_link_ops); rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); list_del_init(list_node); delete_device(dev); } rtnl_unlock(); } module_init(chnl_init_module); module_exit(chnl_exit_module);
12 12 1 10 10 14 14 154 153 153 153 153 153 153 6 2 4 4 4 7 7 40 50 52 52 51 44 7 7 7 49 7 48 15 33 32 1 22 7 15 1 12 1 21 21 52 5 11 5 82 82 22 10 78 83 16 16 16 16 11 11 13 10 4 15 15 14 14 14 2 12 14 16 2 14 14 2 2 1 1 2 1 1 1 1 6 11 14 14 2 10 3 14 12 2 11 11 11 5 11 1907 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "soft-interface.h" #include "main.h" #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/cpumask.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/percpu.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" #include "send.h" #include "translation-table.h" /** * batadv_skb_head_push() - Increase header size and move (push) head pointer * @skb: packet buffer which should be modified * @len: number of bytes to add * * Return: 0 on success or negative error number in case of failure */ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) { int result; /* TODO: We must check if we can release all references to non-payload * data using __skb_header_release in our skbs to allow skb_cow_header * to work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer * to write freely in that area. */ result = skb_cow_head(skb, len); if (result < 0) return result; skb_push(skb, len); return 0; } static int batadv_interface_open(struct net_device *dev) { netif_start_queue(dev); return 0; } static int batadv_interface_release(struct net_device *dev) { netif_stop_queue(dev); return 0; } /** * batadv_sum_counter() - Sum the cpu-local counters for index 'idx' * @bat_priv: the bat priv with all the soft interface information * @idx: index of counter to sum up * * Return: sum of all cpu-local counters */ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) { u64 *counters, sum = 0; int cpu; for_each_possible_cpu(cpu) { counters = per_cpu_ptr(bat_priv->bat_counters, cpu); sum += counters[idx]; } return sum; } static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); stats->tx_dropped = batadv_sum_counter(bat_priv, BATADV_CNT_TX_DROPPED); stats->rx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_RX); stats->rx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_RX_BYTES); return stats; } static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_softif_vlan *vlan; struct sockaddr *addr = p; u8 old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; ether_addr_copy(old_addr, dev->dev_addr); eth_hw_addr_set(dev, addr->sa_data); /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) return 0; rcu_read_lock(); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { batadv_tt_local_remove(bat_priv, old_addr, vlan->vid, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, vlan->vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); } rcu_read_unlock(); return 0; } static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { struct batadv_priv *bat_priv = netdev_priv(dev); /* check ranges */ if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; WRITE_ONCE(dev->mtu, new_mtu); bat_priv->mtu_set_by_user = new_mtu; return 0; } /** * batadv_interface_set_rx_mode() - set the rx mode of a device * @dev: registered network device to modify * * We do not actually need to set any rx filters for the virtual batman * soft interface. However a dummy handler enables a user to set static * multicast listeners for instance. */ static void batadv_interface_set_rx_mode(struct net_device *dev) { } static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, struct net_device *soft_iface) { struct ethhdr *ethhdr; struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x00}; static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 0x00, 0x00}; enum batadv_dhcp_recipient dhcp_rcp = BATADV_DHCP_NO; u8 *dst_hint = NULL, chaddr[ETH_ALEN]; struct vlan_ethhdr *vhdr; unsigned int header_len = 0; int data_len = skb->len, ret; unsigned long brd_delay = 0; bool do_bcast = false, client_added; unsigned short vid; u32 seqno; int gw_mode; enum batadv_forw_mode forw_mode = BATADV_FORW_BCAST; int mcast_is_routable = 0; int network_offset = ETH_HLEN; __be16 proto; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); proto = ethhdr->h_proto; switch (ntohs(proto)) { case ETH_P_8021Q: if (!pskb_may_pull(skb, sizeof(*vhdr))) goto dropped; vhdr = vlan_eth_hdr(skb); proto = vhdr->h_vlan_encapsulated_proto; /* drop batman-in-batman packets to prevent loops */ if (proto != htons(ETH_P_BATMAN)) { network_offset += VLAN_HLEN; break; } fallthrough; case ETH_P_BATMAN: goto dropped; } skb_set_network_header(skb, network_offset); if (batadv_bla_tx(bat_priv, skb, vid)) goto dropped; /* skb->data might have been reallocated by batadv_bla_tx() */ ethhdr = eth_hdr(skb); /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source) && !batadv_bla_is_loopdetect_mac(ethhdr->h_source)) { client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source, vid, skb->skb_iif, skb->mark); if (!client_added) goto dropped; } /* Snoop address candidates from DHCPACKs for early DAT filling */ batadv_dat_snoop_outgoing_dhcp_ack(bat_priv, skb, proto, vid); /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... * * The same goes for ECTP sent at least by some Cisco Switches, * it might confuse the mesh when used with bridge loop avoidance. */ if (batadv_compare_eth(ethhdr->h_dest, stp_addr)) goto dropped; if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) goto dropped; gw_mode = atomic_read(&bat_priv->gw.mode); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* if gw mode is off, broadcast every packet */ if (gw_mode == BATADV_GW_MODE_OFF) { do_bcast = true; goto send; } dhcp_rcp = batadv_gw_dhcp_recipient_get(skb, &header_len, chaddr); /* skb->data may have been modified by * batadv_gw_dhcp_recipient_get() */ ethhdr = eth_hdr(skb); /* if gw_mode is on, broadcast any non-DHCP message. * All the DHCP packets are going to be sent as unicast */ if (dhcp_rcp == BATADV_DHCP_NO) { do_bcast = true; goto send; } if (dhcp_rcp == BATADV_DHCP_TO_CLIENT) dst_hint = chaddr; else if ((gw_mode == BATADV_GW_MODE_SERVER) && (dhcp_rcp == BATADV_DHCP_TO_SERVER)) /* gateways should not forward any DHCP message if * directed to a DHCP server */ goto dropped; send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid, &mcast_is_routable); switch (forw_mode) { case BATADV_FORW_BCAST: break; case BATADV_FORW_UCASTS: case BATADV_FORW_MCAST: do_bcast = false; break; case BATADV_FORW_NONE: fallthrough; default: goto dropped; } } } batadv_skb_set_priority(skb, 0); /* ethernet packet should be broadcasted */ if (do_bcast) { primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto dropped; /* in case of ARP request, we do not immediately broadcasti the * packet, instead we first wait for DAT to try to retrieve the * correct ARP entry */ if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) brd_delay = msecs_to_jiffies(ARP_REQ_DELAY); if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0) goto dropped; bcast_packet = (struct batadv_bcast_packet *)skb->data; bcast_packet->version = BATADV_COMPAT_VERSION; bcast_packet->ttl = BATADV_TTL - 1; /* batman packet type: broadcast */ bcast_packet->packet_type = BATADV_BCAST; bcast_packet->reserved = 0; /* hw address of first interface is the orig mac because only * this mac is known throughout the mesh */ ether_addr_copy(bcast_packet->orig, primary_if->net_dev->dev_addr); /* set broadcast sequence number */ seqno = atomic_inc_return(&bat_priv->bcast_seqno); bcast_packet->seqno = htonl(seqno); batadv_send_bcast_packet(bat_priv, skb, brd_delay, true); /* unicast packet */ } else { /* DHCP packets going to a server will use the GW feature */ if (dhcp_rcp == BATADV_DHCP_TO_SERVER) { ret = batadv_gw_out_of_range(bat_priv, skb); if (ret) goto dropped; ret = batadv_send_skb_via_gw(bat_priv, skb, vid); } else if (forw_mode == BATADV_FORW_UCASTS) { ret = batadv_mcast_forw_send(bat_priv, skb, vid, mcast_is_routable); } else if (forw_mode == BATADV_FORW_MCAST) { ret = batadv_mcast_forw_mcsend(bat_priv, skb); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) goto dropped; batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, vid); } if (ret != NET_XMIT_SUCCESS) goto dropped_freed; } batadv_inc_counter(bat_priv, BATADV_CNT_TX); batadv_add_counter(bat_priv, BATADV_CNT_TX_BYTES, data_len); goto end; dropped: kfree_skb(skb); dropped_freed: batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: batadv_hardif_put(primary_if); return NETDEV_TX_OK; } /** * batadv_interface_rx() - receive ethernet frame on local batman-adv interface * @soft_iface: local interface which will receive the ethernet frame * @skb: ethernet frame for @soft_iface * @hdr_size: size of already parsed batman-adv header * @orig_node: originator from which the batman-adv packet was sent * * Sends an ethernet frame to the receive path of the local @soft_iface. * skb->data has still point to the batman-adv header with the size @hdr_size. * The caller has to have parsed this header already and made sure that at least * @hdr_size bytes are still available for pull in @skb. * * The packet may still get dropped. This can happen when the encapsulated * ethernet frame is invalid or contains again an batman-adv packet. Also * unicast packets will be dropped directly when it was sent between two * isolated clients. */ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; int packet_type; batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; packet_type = batadv_bcast_packet->packet_type; skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); /* clean the netfilter state now that the batman-adv header has been * removed */ nf_reset_ct(skb); if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) goto dropped; vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) goto dropped; vhdr = skb_vlan_eth_hdr(skb); /* drop batman-in-batman packets to prevent loops */ if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) break; fallthrough; case ETH_P_BATMAN: goto dropped; } /* skb->dev & skb->pkt_type are set here */ skb->protocol = eth_type_trans(skb, soft_iface); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, skb->len + ETH_HLEN); /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ if (batadv_bla_rx(bat_priv, skb, vid, packet_type)) goto out; if (orig_node) batadv_tt_add_temporary_global_entry(bat_priv, orig_node, ethhdr->h_source, vid); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* set the mark on broadcast packets if AP isolation is ON and * the packet is coming from an "isolated" client */ if (batadv_vlan_ap_isola_get(bat_priv, vid) && batadv_tt_global_is_isolated(bat_priv, ethhdr->h_source, vid)) { /* save bits in skb->mark not covered by the mask and * apply the mark on the rest */ skb->mark &= ~bat_priv->isolation_mark_mask; skb->mark |= bat_priv->isolation_mark; } } else if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest, vid)) { goto dropped; } netif_rx(skb); goto out; dropped: kfree_skb(skb); out: return; } /** * batadv_softif_vlan_release() - release vlan from lists and queue for free * after rcu grace period * @ref: kref pointer of the vlan object */ void batadv_softif_vlan_release(struct kref *ref) { struct batadv_softif_vlan *vlan; vlan = container_of(ref, struct batadv_softif_vlan, refcount); spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); hlist_del_rcu(&vlan->list); spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock); kfree_rcu(vlan, rcu); } /** * batadv_softif_vlan_get() - get the vlan object for a specific vid * @bat_priv: the bat priv with all the soft interface information * @vid: the identifier of the vlan object to retrieve * * Return: the private data of the vlan matching the vid passed as argument or * NULL otherwise. The refcounter of the returned object is incremented by 1. */ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_softif_vlan *vlan_tmp, *vlan = NULL; rcu_read_lock(); hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) { if (vlan_tmp->vid != vid) continue; if (!kref_get_unless_zero(&vlan_tmp->refcount)) continue; vlan = vlan_tmp; break; } rcu_read_unlock(); return vlan; } /** * batadv_softif_create_vlan() - allocate the needed resources for a new vlan * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier * * Return: 0 on success, a negative error otherwise. */ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_softif_vlan *vlan; spin_lock_bh(&bat_priv->softif_vlan_list_lock); vlan = batadv_softif_vlan_get(bat_priv, vid); if (vlan) { batadv_softif_vlan_put(vlan); spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -EEXIST; } vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); if (!vlan) { spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -ENOMEM; } vlan->bat_priv = bat_priv; vlan->vid = vid; kref_init(&vlan->refcount); atomic_set(&vlan->ap_isolation, 0); kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); spin_unlock_bh(&bat_priv->softif_vlan_list_lock); /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ batadv_tt_local_add(bat_priv->soft_iface, bat_priv->soft_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); /* don't return reference to new softif_vlan */ batadv_softif_vlan_put(vlan); return 0; } /** * batadv_softif_destroy_vlan() - remove and destroy a softif_vlan object * @bat_priv: the bat priv with all the soft interface information * @vlan: the object to remove */ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, struct batadv_softif_vlan *vlan) { /* explicitly remove the associated TT local entry because it is marked * with the NOPURGE flag */ batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); batadv_softif_vlan_put(vlan); } /** * batadv_interface_add_vid() - ndo_add_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the vlan id * @vid: identifier of the new vlan * * Set up all the internal structures for handling the new vlan on top of the * mesh interface * * Return: 0 on success or a negative error code in case of failure. */ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_softif_vlan *vlan; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types */ if (proto != htons(ETH_P_8021Q)) return -EINVAL; /* VID 0 is only used to indicate "priority tag" frames which only * contain priority information and no VID. No management structures * should be created for this VID and it should be handled like an * untagged frame. */ if (vid == 0) return 0; vid |= BATADV_VLAN_HAS_TAG; /* if a new vlan is getting created and it already exists, it means that * it was not deleted yet. batadv_softif_vlan_get() increases the * refcount in order to revive the object. * * if it does not exist then create it. */ vlan = batadv_softif_vlan_get(bat_priv, vid); if (!vlan) return batadv_softif_create_vlan(bat_priv, vid); /* add a new TT local entry. This one will be marked with the NOPURGE * flag. This must be added again, even if the vlan object already * exists, because the entry was deleted by kill_vid() */ batadv_tt_local_add(bat_priv->soft_iface, bat_priv->soft_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); return 0; } /** * batadv_interface_kill_vid() - ndo_kill_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the vlan id * @vid: identifier of the deleted vlan * * Destroy all the internal structures used to handle the vlan identified by vid * on top of the mesh interface * * Return: 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q * or -ENOENT if the specified vlan id wasn't registered. */ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_softif_vlan *vlan; /* only 802.1Q vlans are supported. batman-adv does not know how to * handle other types */ if (proto != htons(ETH_P_8021Q)) return -EINVAL; /* "priority tag" frames are handled like "untagged" frames * and no softif_vlan needs to be destroyed */ if (vid == 0) return 0; vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return -ENOENT; batadv_softif_destroy_vlan(bat_priv, vlan); /* finally free the vlan object */ batadv_softif_vlan_put(vlan); return 0; } /* batman-adv network devices have devices nesting below it and are a special * "super class" of normal network devices; split their locks off into a * separate class since they always nest. */ static struct lock_class_key batadv_netdev_xmit_lock_key; static struct lock_class_key batadv_netdev_addr_lock_key; /** * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue * @dev: device which owns the tx queue * @txq: tx queue to modify * @_unused: always NULL */ static void batadv_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, void *_unused) { lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); } /** * batadv_set_lockdep_class() - Set txq and addr_list lockdep class * @dev: network device to modify */ static void batadv_set_lockdep_class(struct net_device *dev) { lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); } /** * batadv_softif_init_late() - late stage initialization of soft interface * @dev: registered network device to modify * * Return: error code on failures */ static int batadv_softif_init_late(struct net_device *dev) { struct batadv_priv *bat_priv; u32 random_seqno; int ret; size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM; batadv_set_lockdep_class(dev); bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called */ bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(u64)); if (!bat_priv->bat_counters) return -ENOMEM; atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bridge_loop_avoidance, 1); #endif #ifdef CONFIG_BATMAN_ADV_DAT atomic_set(&bat_priv->distributed_arp_table, 1); #endif #ifdef CONFIG_BATMAN_ADV_MCAST atomic_set(&bat_priv->multicast_mode, 1); atomic_set(&bat_priv->multicast_fanout, 16); atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0); #endif atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); atomic_set(&bat_priv->hop_penalty, 30); #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); #endif atomic_set(&bat_priv->fragmentation, 1); atomic_set(&bat_priv->packet_size_max, ETH_DATA_LEN); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); atomic_set(&bat_priv->tt.vn, 0); atomic_set(&bat_priv->tt.ogm_append_cnt, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); #endif atomic_set(&bat_priv->tp_num, 0); WRITE_ONCE(bat_priv->tt.local_changes, 0); bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; bat_priv->isolation_mark = 0; bat_priv->isolation_mark_mask = 0; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&bat_priv->frag_seqno, random_seqno); bat_priv->primary_if = NULL; batadv_nc_init_bat_priv(bat_priv); if (!bat_priv->algo_ops) { ret = batadv_algo_select(bat_priv, batadv_routing_algo); if (ret < 0) goto free_bat_counters; } ret = batadv_mesh_init(dev); if (ret < 0) goto free_bat_counters; return 0; free_bat_counters: free_percpu(bat_priv->bat_counters); bat_priv->bat_counters = NULL; return ret; } /** * batadv_softif_slave_add() - Add a slave interface to a batadv_soft_interface * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should become the slave interface * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_softif_slave_add(struct net_device *dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) { struct batadv_hard_iface *hard_iface; int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); if (!hard_iface || hard_iface->soft_iface) goto out; ret = batadv_hardif_enable_interface(hard_iface, dev); out: batadv_hardif_put(hard_iface); return ret; } /** * batadv_softif_slave_del() - Delete a slave iface from a batadv_soft_interface * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should be removed from the master interface * * Return: 0 if successful or error otherwise. */ static int batadv_softif_slave_del(struct net_device *dev, struct net_device *slave_dev) { struct batadv_hard_iface *hard_iface; int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); if (!hard_iface || hard_iface->soft_iface != dev) goto out; batadv_hardif_disable_interface(hard_iface); ret = 0; out: batadv_hardif_put(hard_iface); return ret; } static const struct net_device_ops batadv_netdev_ops = { .ndo_init = batadv_softif_init_late, .ndo_open = batadv_interface_open, .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, .ndo_vlan_rx_add_vid = batadv_interface_add_vid, .ndo_vlan_rx_kill_vid = batadv_interface_kill_vid, .ndo_set_mac_address = batadv_interface_set_mac_addr, .ndo_change_mtu = batadv_interface_change_mtu, .ndo_set_rx_mode = batadv_interface_set_rx_mode, .ndo_start_xmit = batadv_interface_tx, .ndo_validate_addr = eth_validate_addr, .ndo_add_slave = batadv_softif_slave_add, .ndo_del_slave = batadv_softif_slave_del, }; static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strscpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); strscpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); strscpy(info->bus_info, "batman", sizeof(info->bus_info)); } /* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 * Declare each description string in struct.name[] to get fixed sized buffer * and compile time checking for strings longer than ETH_GSTRING_LEN. */ static const struct { const char name[ETH_GSTRING_LEN]; } batadv_counters_strings[] = { { "tx" }, { "tx_bytes" }, { "tx_dropped" }, { "rx" }, { "rx_bytes" }, { "forward" }, { "forward_bytes" }, { "mgmt_tx" }, { "mgmt_tx_bytes" }, { "mgmt_rx" }, { "mgmt_rx_bytes" }, { "frag_tx" }, { "frag_tx_bytes" }, { "frag_rx" }, { "frag_rx_bytes" }, { "frag_fwd" }, { "frag_fwd_bytes" }, { "tt_request_tx" }, { "tt_request_rx" }, { "tt_response_tx" }, { "tt_response_rx" }, { "tt_roam_adv_tx" }, { "tt_roam_adv_rx" }, #ifdef CONFIG_BATMAN_ADV_MCAST { "mcast_tx" }, { "mcast_tx_bytes" }, { "mcast_tx_local" }, { "mcast_tx_local_bytes" }, { "mcast_rx" }, { "mcast_rx_bytes" }, { "mcast_rx_local" }, { "mcast_rx_local_bytes" }, { "mcast_fwd" }, { "mcast_fwd_bytes" }, #endif #ifdef CONFIG_BATMAN_ADV_DAT { "dat_get_tx" }, { "dat_get_rx" }, { "dat_put_tx" }, { "dat_put_rx" }, { "dat_cached_reply_tx" }, #endif #ifdef CONFIG_BATMAN_ADV_NC { "nc_code" }, { "nc_code_bytes" }, { "nc_recode" }, { "nc_recode_bytes" }, { "nc_buffer" }, { "nc_decode" }, { "nc_decode_bytes" }, { "nc_decode_failed" }, { "nc_sniffed" }, #endif }; static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) { if (stringset == ETH_SS_STATS) memcpy(data, batadv_counters_strings, sizeof(batadv_counters_strings)); } static void batadv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct batadv_priv *bat_priv = netdev_priv(dev); int i; for (i = 0; i < BATADV_CNT_NUM; i++) data[i] = batadv_sum_counter(bat_priv, i); } static int batadv_get_sset_count(struct net_device *dev, int stringset) { if (stringset == ETH_SS_STATS) return BATADV_CNT_NUM; return -EOPNOTSUPP; } static const struct ethtool_ops batadv_ethtool_ops = { .get_drvinfo = batadv_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = batadv_get_strings, .get_ethtool_stats = batadv_get_ethtool_stats, .get_sset_count = batadv_get_sset_count, }; /** * batadv_softif_free() - Deconstructor of batadv_soft_interface * @dev: Device to cleanup and remove */ static void batadv_softif_free(struct net_device *dev) { batadv_mesh_free(dev); /* some scheduled RCU callbacks need the bat_priv struct to accomplish * their tasks. Wait for them all to be finished before freeing the * netdev and its private data (bat_priv) */ rcu_barrier(); } /** * batadv_softif_init_early() - early stage initialization of soft interface * @dev: registered network device to modify */ static void batadv_softif_init_early(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = batadv_softif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; dev->netns_local = true; /* can't call min_mtu, because the needed variables * have not been initialized yet */ dev->mtu = ETH_DATA_LEN; /* generate random address */ eth_hw_addr_random(dev); dev->ethtool_ops = &batadv_ethtool_ops; } /** * batadv_softif_validate() - validate configuration of new batadv link * @tb: IFLA_INFO_DATA netlink attributes * @data: enum batadv_ifla_attrs attributes * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct batadv_algo_ops *algo_ops; if (!data) return 0; if (data[IFLA_BATADV_ALGO_NAME]) { algo_ops = batadv_algo_get(nla_data(data[IFLA_BATADV_ALGO_NAME])); if (!algo_ops) return -EINVAL; } return 0; } /** * batadv_softif_newlink() - pre-initialize and register new batadv link * @src_net: the applicable net namespace * @dev: network device to register * @tb: IFLA_INFO_DATA netlink attributes * @data: enum batadv_ifla_attrs attributes * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_softif_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct batadv_priv *bat_priv = netdev_priv(dev); const char *algo_name; int err; if (data && data[IFLA_BATADV_ALGO_NAME]) { algo_name = nla_data(data[IFLA_BATADV_ALGO_NAME]); err = batadv_algo_select(bat_priv, algo_name); if (err) return -EINVAL; } return register_netdevice(dev); } /** * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via * netlink * @soft_iface: the to-be-removed batman-adv interface * @head: list pointer */ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, struct list_head *head) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *hard_iface; struct batadv_softif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface == soft_iface) batadv_hardif_disable_interface(hard_iface); } /* destroy the "untagged" VLAN */ vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (vlan) { batadv_softif_destroy_vlan(bat_priv, vlan); batadv_softif_vlan_put(vlan); } unregister_netdevice_queue(soft_iface, head); } /** * batadv_softif_is_valid() - Check whether device is a batadv soft interface * @net_dev: device which should be checked * * Return: true when net_dev is a batman-adv interface, false otherwise */ bool batadv_softif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) return true; return false; } static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = { [IFLA_BATADV_ALGO_NAME] = { .type = NLA_NUL_STRING }, }; struct rtnl_link_ops batadv_link_ops __read_mostly = { .kind = "batadv", .priv_size = sizeof(struct batadv_priv), .setup = batadv_softif_init_early, .maxtype = IFLA_BATADV_MAX, .policy = batadv_ifla_policy, .validate = batadv_softif_validate, .newlink = batadv_softif_newlink, .dellink = batadv_softif_destroy_netlink, };
55 55 7 80 80 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar * Copyright (C) 2005-2006, Thomas Gleixner * * This file contains the IRQ-resend code * * If the interrupt is waiting to be processed, we try to re-run it. * We can't directly run it from here since the caller might be in an * interrupt-protected region. Not all irq controller chips can * retrigger interrupts at the hardware level, so in those cases * we allow the resending of IRQs via a tasklet. */ #include <linux/irq.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> #include "internals.h" #ifdef CONFIG_HARDIRQS_SW_RESEND /* hlist_head to handle software resend of interrupts: */ static HLIST_HEAD(irq_resend_list); static DEFINE_RAW_SPINLOCK(irq_resend_lock); /* * Run software resends of IRQ's */ static void resend_irqs(struct tasklet_struct *unused) { struct irq_desc *desc; raw_spin_lock_irq(&irq_resend_lock); while (!hlist_empty(&irq_resend_list)) { desc = hlist_entry(irq_resend_list.first, struct irq_desc, resend_node); hlist_del_init(&desc->resend_node); raw_spin_unlock(&irq_resend_lock); desc->handle_irq(desc); raw_spin_lock(&irq_resend_lock); } raw_spin_unlock_irq(&irq_resend_lock); } /* Tasklet to handle resend: */ static DECLARE_TASKLET(resend_tasklet, resend_irqs); static int irq_sw_resend(struct irq_desc *desc) { /* * Validate whether this interrupt can be safely injected from * non interrupt context */ if (irqd_is_handle_enforce_irqctx(&desc->irq_data)) return -EINVAL; /* * If the interrupt is running in the thread context of the parent * irq we need to be careful, because we cannot trigger it * directly. */ if (irq_settings_is_nested_thread(desc)) { /* * If the parent_irq is valid, we retrigger the parent, * otherwise we do nothing. */ if (!desc->parent_irq) return -EINVAL; desc = irq_to_desc(desc->parent_irq); if (!desc) return -EINVAL; } /* Add to resend_list and activate the softirq: */ raw_spin_lock(&irq_resend_lock); if (hlist_unhashed(&desc->resend_node)) hlist_add_head(&desc->resend_node, &irq_resend_list); raw_spin_unlock(&irq_resend_lock); tasklet_schedule(&resend_tasklet); return 0; } void clear_irq_resend(struct irq_desc *desc) { raw_spin_lock(&irq_resend_lock); hlist_del_init(&desc->resend_node); raw_spin_unlock(&irq_resend_lock); } void irq_resend_init(struct irq_desc *desc) { INIT_HLIST_NODE(&desc->resend_node); } #else void clear_irq_resend(struct irq_desc *desc) {} void irq_resend_init(struct irq_desc *desc) {} static int irq_sw_resend(struct irq_desc *desc) { return -EINVAL; } #endif static int try_retrigger(struct irq_desc *desc) { if (desc->irq_data.chip->irq_retrigger) return desc->irq_data.chip->irq_retrigger(&desc->irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY return irq_chip_retrigger_hierarchy(&desc->irq_data); #else return 0; #endif } /* * IRQ resend * * Is called with interrupts disabled and desc->lock held. */ int check_irq_resend(struct irq_desc *desc, bool inject) { int err = 0; /* * We do not resend level type interrupts. Level type interrupts * are resent by hardware when they are still active. Clear the * pending bit so suspend/resume does not get confused. */ if (irq_settings_is_level(desc)) { desc->istate &= ~IRQS_PENDING; return -EINVAL; } if (desc->istate & IRQS_REPLAY) return -EBUSY; if (!(desc->istate & IRQS_PENDING) && !inject) return 0; desc->istate &= ~IRQS_PENDING; if (!try_retrigger(desc)) err = irq_sw_resend(desc); /* If the retrigger was successful, mark it with the REPLAY bit */ if (!err) desc->istate |= IRQS_REPLAY; return err; } #ifdef CONFIG_GENERIC_IRQ_INJECTION /** * irq_inject_interrupt - Inject an interrupt for testing/error injection * @irq: The interrupt number * * This function must only be used for debug and testing purposes! * * Especially on x86 this can cause a premature completion of an interrupt * affinity change causing the interrupt line to become stale. Very * unlikely, but possible. * * The injection can fail for various reasons: * - Interrupt is not activated * - Interrupt is NMI type or currently replaying * - Interrupt is level type * - Interrupt does not support hardware retrigger and software resend is * either not enabled or not possible for the interrupt. */ int irq_inject_interrupt(unsigned int irq) { struct irq_desc *desc; unsigned long flags; int err; /* Try the state injection hardware interface first */ if (!irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true)) return 0; /* That failed, try via the resend mechanism */ desc = irq_get_desc_buslock(irq, &flags, 0); if (!desc) return -EINVAL; /* * Only try to inject when the interrupt is: * - not NMI type * - activated */ if (irq_is_nmi(desc) || !irqd_is_activated(&desc->irq_data)) err = -EINVAL; else err = check_irq_resend(desc, true); irq_put_desc_busunlock(desc, flags); return err; } EXPORT_SYMBOL_GPL(irq_inject_interrupt); #endif
56 56 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* * proc.c - procfs support for Protocol family CAN core module * * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/if_arp.h> #include <linux/can/can-ml.h> #include <linux/can/core.h> #include "af_can.h" /* * proc filenames for the PF_CAN core */ #define CAN_PROC_STATS "stats" #define CAN_PROC_RESET_STATS "reset_stats" #define CAN_PROC_RCVLIST_ALL "rcvlist_all" #define CAN_PROC_RCVLIST_FIL "rcvlist_fil" #define CAN_PROC_RCVLIST_INV "rcvlist_inv" #define CAN_PROC_RCVLIST_SFF "rcvlist_sff" #define CAN_PROC_RCVLIST_EFF "rcvlist_eff" #define CAN_PROC_RCVLIST_ERR "rcvlist_err" static int user_reset; static const char rx_list_name[][8] = { [RX_ERR] = "rx_err", [RX_ALL] = "rx_all", [RX_FIL] = "rx_fil", [RX_INV] = "rx_inv", }; /* * af_can statistics stuff */ static void can_init_stats(struct net *net) { struct can_pkg_stats *pkg_stats = net->can.pkg_stats; struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; /* * This memset function is called from a timer context (when * can_stattimer is active which is the default) OR in a process * context (reading the proc_fs when can_stattimer is disabled). */ memset(pkg_stats, 0, sizeof(struct can_pkg_stats)); pkg_stats->jiffies_init = jiffies; rcv_lists_stats->stats_reset++; if (user_reset) { user_reset = 0; rcv_lists_stats->user_reset++; } } static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif, unsigned long count) { if (oldjif == newjif) return 0; /* see can_stat_update() - this should NEVER happen! */ if (count > (ULONG_MAX / HZ)) { printk(KERN_ERR "can: calc_rate: count exceeded! %ld\n", count); return 99999999; } return (count * HZ) / (newjif - oldjif); } void can_stat_update(struct timer_list *t) { struct net *net = from_timer(net, t, can.stattimer); struct can_pkg_stats *pkg_stats = net->can.pkg_stats; unsigned long j = jiffies; /* snapshot */ /* restart counting in timer context on user request */ if (user_reset) can_init_stats(net); /* restart counting on jiffies overflow */ if (j < pkg_stats->jiffies_init) can_init_stats(net); /* prevent overflow in calc_rate() */ if (pkg_stats->rx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* prevent overflow in calc_rate() */ if (pkg_stats->tx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* matches overflow - very improbable */ if (pkg_stats->matches > (ULONG_MAX / 100)) can_init_stats(net); /* calc total values */ if (pkg_stats->rx_frames) pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) / pkg_stats->rx_frames; pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j, pkg_stats->tx_frames); pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j, pkg_stats->rx_frames); /* calc current values */ if (pkg_stats->rx_frames_delta) pkg_stats->current_rx_match_ratio = (pkg_stats->matches_delta * 100) / pkg_stats->rx_frames_delta; pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta); pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta); /* check / update maximum values */ if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate) pkg_stats->max_tx_rate = pkg_stats->current_tx_rate; if (pkg_stats->max_rx_rate < pkg_stats->current_rx_rate) pkg_stats->max_rx_rate = pkg_stats->current_rx_rate; if (pkg_stats->max_rx_match_ratio < pkg_stats->current_rx_match_ratio) pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio; /* clear values for 'current rate' calculation */ pkg_stats->tx_frames_delta = 0; pkg_stats->rx_frames_delta = 0; pkg_stats->matches_delta = 0; /* restart timer (one second) */ mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); } /* * proc read functions */ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, struct net_device *dev) { struct receiver *r; hlist_for_each_entry_rcu(r, rx_list, list) { char *fmt = (r->can_id & CAN_EFF_FLAG)? " %-5s %08x %08x %pK %pK %8ld %s\n" : " %-5s %03x %08x %pK %pK %8ld %s\n"; seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask, r->func, r->data, r->matches, r->ident); } } static void can_print_recv_banner(struct seq_file *m) { /* * can1. 00000000 00000000 00000000 * ....... 0 tp20 */ if (IS_ENABLED(CONFIG_64BIT)) seq_puts(m, " device can_id can_mask function userdata matches ident\n"); else seq_puts(m, " device can_id can_mask function userdata matches ident\n"); } static int can_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; struct can_pkg_stats *pkg_stats = net->can.pkg_stats; struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; seq_putc(m, '\n'); seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames); seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames); seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches); seq_putc(m, '\n'); if (net->can.stattimer.function == can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", pkg_stats->total_rx_match_ratio); seq_printf(m, " %8ld frames/s total tx rate (TXR)\n", pkg_stats->total_tx_rate); seq_printf(m, " %8ld frames/s total rx rate (RXR)\n", pkg_stats->total_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% current match ratio (CRXMR)\n", pkg_stats->current_rx_match_ratio); seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n", pkg_stats->current_tx_rate); seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n", pkg_stats->current_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% max match ratio (MRXMR)\n", pkg_stats->max_rx_match_ratio); seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n", pkg_stats->max_tx_rate); seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n", pkg_stats->max_rx_rate); seq_putc(m, '\n'); } seq_printf(m, " %8ld current receive list entries (CRCV)\n", rcv_lists_stats->rcv_entries); seq_printf(m, " %8ld maximum receive list entries (MRCV)\n", rcv_lists_stats->rcv_entries_max); if (rcv_lists_stats->stats_reset) seq_printf(m, "\n %8ld statistic resets (STR)\n", rcv_lists_stats->stats_reset); if (rcv_lists_stats->user_reset) seq_printf(m, " %8ld user statistic resets (USTR)\n", rcv_lists_stats->user_reset); seq_putc(m, '\n'); return 0; } static int can_reset_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; struct can_pkg_stats *pkg_stats = net->can.pkg_stats; user_reset = 1; if (net->can.stattimer.function == can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", rcv_lists_stats->stats_reset + 1); } else { if (pkg_stats->jiffies_init != jiffies) can_init_stats(net); seq_printf(m, "Performed statistic reset #%ld.\n", rcv_lists_stats->stats_reset); } return 0; } static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, struct net_device *dev, struct can_dev_rcv_lists *dev_rcv_lists) { if (!hlist_empty(&dev_rcv_lists->rx[idx])) { can_print_recv_banner(m); can_print_rcvlist(m, &dev_rcv_lists->rx[idx], dev); } else seq_printf(m, " (%s: no entry)\n", DNAME(dev)); } static int can_rcvlist_proc_show(struct seq_file *m, void *v) { /* double cast to prevent GCC warning */ int idx = (int)(long)pde_data(m->file->f_inode); struct net_device *dev; struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); rcu_read_lock(); /* receive list for 'all' CAN devices (dev == NULL) */ dev_rcv_lists = net->can.rx_alldev_list; can_rcvlist_proc_show_one(m, idx, NULL, dev_rcv_lists); /* receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { struct can_ml_priv *can_ml = can_get_ml_priv(dev); if (can_ml) can_rcvlist_proc_show_one(m, idx, dev, &can_ml->dev_rcv_lists); } rcu_read_unlock(); seq_putc(m, '\n'); return 0; } static inline void can_rcvlist_proc_show_array(struct seq_file *m, struct net_device *dev, struct hlist_head *rcv_array, unsigned int rcv_array_sz) { unsigned int i; int all_empty = 1; /* check whether at least one list is non-empty */ for (i = 0; i < rcv_array_sz; i++) if (!hlist_empty(&rcv_array[i])) { all_empty = 0; break; } if (!all_empty) { can_print_recv_banner(m); for (i = 0; i < rcv_array_sz; i++) { if (!hlist_empty(&rcv_array[i])) can_print_rcvlist(m, &rcv_array[i], dev); } } else seq_printf(m, " (%s: no entry)\n", DNAME(dev)); } static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_SFF */ seq_puts(m, "\nreceive list 'rx_sff':\n"); rcu_read_lock(); /* sff receive list for 'all' CAN devices (dev == NULL) */ dev_rcv_lists = net->can.rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_sff, ARRAY_SIZE(dev_rcv_lists->rx_sff)); /* sff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { struct can_ml_priv *can_ml = can_get_ml_priv(dev); if (can_ml) { dev_rcv_lists = &can_ml->dev_rcv_lists; can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff, ARRAY_SIZE(dev_rcv_lists->rx_sff)); } } rcu_read_unlock(); seq_putc(m, '\n'); return 0; } static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_EFF */ seq_puts(m, "\nreceive list 'rx_eff':\n"); rcu_read_lock(); /* eff receive list for 'all' CAN devices (dev == NULL) */ dev_rcv_lists = net->can.rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_eff, ARRAY_SIZE(dev_rcv_lists->rx_eff)); /* eff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { struct can_ml_priv *can_ml = can_get_ml_priv(dev); if (can_ml) { dev_rcv_lists = &can_ml->dev_rcv_lists; can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff, ARRAY_SIZE(dev_rcv_lists->rx_eff)); } } rcu_read_unlock(); seq_putc(m, '\n'); return 0; } /* * can_init_proc - create main CAN proc directory and procfs entries */ void can_init_proc(struct net *net) { /* create /proc/net/can directory */ net->can.proc_dir = proc_net_mkdir(net, "can", net->proc_net); if (!net->can.proc_dir) { printk(KERN_INFO "can: failed to create /proc/net/can . " "CONFIG_PROC_FS missing?\n"); return; } /* own procfs entries from the AF_CAN core */ net->can.pde_stats = proc_create_net_single(CAN_PROC_STATS, 0644, net->can.proc_dir, can_stats_proc_show, NULL); net->can.pde_reset_stats = proc_create_net_single(CAN_PROC_RESET_STATS, 0644, net->can.proc_dir, can_reset_stats_proc_show, NULL); net->can.pde_rcvlist_err = proc_create_net_single(CAN_PROC_RCVLIST_ERR, 0644, net->can.proc_dir, can_rcvlist_proc_show, (void *)RX_ERR); net->can.pde_rcvlist_all = proc_create_net_single(CAN_PROC_RCVLIST_ALL, 0644, net->can.proc_dir, can_rcvlist_proc_show, (void *)RX_ALL); net->can.pde_rcvlist_fil = proc_create_net_single(CAN_PROC_RCVLIST_FIL, 0644, net->can.proc_dir, can_rcvlist_proc_show, (void *)RX_FIL); net->can.pde_rcvlist_inv = proc_create_net_single(CAN_PROC_RCVLIST_INV, 0644, net->can.proc_dir, can_rcvlist_proc_show, (void *)RX_INV); net->can.pde_rcvlist_eff = proc_create_net_single(CAN_PROC_RCVLIST_EFF, 0644, net->can.proc_dir, can_rcvlist_eff_proc_show, NULL); net->can.pde_rcvlist_sff = proc_create_net_single(CAN_PROC_RCVLIST_SFF, 0644, net->can.proc_dir, can_rcvlist_sff_proc_show, NULL); } /* * can_remove_proc - remove procfs entries and main CAN proc directory */ void can_remove_proc(struct net *net) { if (!net->can.proc_dir) return; if (net->can.pde_stats) remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir); if (net->can.pde_reset_stats) remove_proc_entry(CAN_PROC_RESET_STATS, net->can.proc_dir); if (net->can.pde_rcvlist_err) remove_proc_entry(CAN_PROC_RCVLIST_ERR, net->can.proc_dir); if (net->can.pde_rcvlist_all) remove_proc_entry(CAN_PROC_RCVLIST_ALL, net->can.proc_dir); if (net->can.pde_rcvlist_fil) remove_proc_entry(CAN_PROC_RCVLIST_FIL, net->can.proc_dir); if (net->can.pde_rcvlist_inv) remove_proc_entry(CAN_PROC_RCVLIST_INV, net->can.proc_dir); if (net->can.pde_rcvlist_eff) remove_proc_entry(CAN_PROC_RCVLIST_EFF, net->can.proc_dir); if (net->can.pde_rcvlist_sff) remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); remove_proc_entry("can", net->proc_net); }
21 11 6 1 21 15 6 21 19 1 21 38 22 13 31 17 38 4 16 14 52 52 3 21 19 19 54 54 49 41 38 21 17 21 21 20 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2014 Fraunhofer ITWM * * Written by: * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> */ #include <linux/ieee802154.h> #include <net/mac802154.h> #include <net/ieee802154_netdev.h> static int ieee802154_hdr_push_addr(u8 *buf, const struct ieee802154_addr *addr, bool omit_pan) { int pos = 0; if (addr->mode == IEEE802154_ADDR_NONE) return 0; if (!omit_pan) { memcpy(buf + pos, &addr->pan_id, 2); pos += 2; } switch (addr->mode) { case IEEE802154_ADDR_SHORT: memcpy(buf + pos, &addr->short_addr, 2); pos += 2; break; case IEEE802154_ADDR_LONG: memcpy(buf + pos, &addr->extended_addr, IEEE802154_ADDR_LEN); pos += IEEE802154_ADDR_LEN; break; default: return -EINVAL; } return pos; } static int ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr) { int pos = 5; memcpy(buf, hdr, 1); memcpy(buf + 1, &hdr->frame_counter, 4); switch (hdr->key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: return pos; case IEEE802154_SCF_KEY_INDEX: break; case IEEE802154_SCF_KEY_SHORT_INDEX: memcpy(buf + pos, &hdr->short_src, 4); pos += 4; break; case IEEE802154_SCF_KEY_HW_INDEX: memcpy(buf + pos, &hdr->extended_src, IEEE802154_ADDR_LEN); pos += IEEE802154_ADDR_LEN; break; } buf[pos++] = hdr->key_id; return pos; } int ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr) { u8 buf[IEEE802154_MAX_HEADER_LEN]; int pos = 2; int rc; struct ieee802154_hdr_fc *fc = &hdr->fc; buf[pos++] = hdr->seq; fc->dest_addr_mode = hdr->dest.mode; rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false); if (rc < 0) return -EINVAL; pos += rc; fc->source_addr_mode = hdr->source.mode; if (hdr->source.pan_id == hdr->dest.pan_id && hdr->dest.mode != IEEE802154_ADDR_NONE) fc->intra_pan = true; rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc->intra_pan); if (rc < 0) return -EINVAL; pos += rc; if (fc->security_enabled) { fc->version = 1; rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec); if (rc < 0) return -EINVAL; pos += rc; } memcpy(buf, fc, 2); memcpy(skb_push(skb, pos), buf, pos); return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_push); int ieee802154_mac_cmd_push(struct sk_buff *skb, void *f, const void *pl, unsigned int pl_len) { struct ieee802154_mac_cmd_frame *frame = f; struct ieee802154_mac_cmd_pl *mac_pl = &frame->mac_pl; struct ieee802154_hdr *mhr = &frame->mhr; int ret; skb_reserve(skb, sizeof(*mhr)); ret = ieee802154_hdr_push(skb, mhr); if (ret < 0) return ret; skb_reset_mac_header(skb); skb->mac_len = ret; skb_put_data(skb, mac_pl, sizeof(*mac_pl)); skb_put_data(skb, pl, pl_len); return 0; } EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_push); int ieee802154_beacon_push(struct sk_buff *skb, struct ieee802154_beacon_frame *beacon) { struct ieee802154_beacon_hdr *mac_pl = &beacon->mac_pl; struct ieee802154_hdr *mhr = &beacon->mhr; int ret; skb_reserve(skb, sizeof(*mhr)); ret = ieee802154_hdr_push(skb, mhr); if (ret < 0) return ret; skb_reset_mac_header(skb); skb->mac_len = ret; skb_put_data(skb, mac_pl, sizeof(*mac_pl)); if (mac_pl->pend_short_addr_count || mac_pl->pend_ext_addr_count) return -EOPNOTSUPP; return 0; } EXPORT_SYMBOL_GPL(ieee802154_beacon_push); static int ieee802154_hdr_get_addr(const u8 *buf, int mode, bool omit_pan, struct ieee802154_addr *addr) { int pos = 0; addr->mode = mode; if (mode == IEEE802154_ADDR_NONE) return 0; if (!omit_pan) { memcpy(&addr->pan_id, buf + pos, 2); pos += 2; } if (mode == IEEE802154_ADDR_SHORT) { memcpy(&addr->short_addr, buf + pos, 2); return pos + 2; } else { memcpy(&addr->extended_addr, buf + pos, IEEE802154_ADDR_LEN); return pos + IEEE802154_ADDR_LEN; } } static int ieee802154_hdr_addr_len(int mode, bool omit_pan) { int pan_len = omit_pan ? 0 : 2; switch (mode) { case IEEE802154_ADDR_NONE: return 0; case IEEE802154_ADDR_SHORT: return 2 + pan_len; case IEEE802154_ADDR_LONG: return IEEE802154_ADDR_LEN + pan_len; default: return -EINVAL; } } static int ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr) { int pos = 5; memcpy(hdr, buf, 1); memcpy(&hdr->frame_counter, buf + 1, 4); switch (hdr->key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: return pos; case IEEE802154_SCF_KEY_INDEX: break; case IEEE802154_SCF_KEY_SHORT_INDEX: memcpy(&hdr->short_src, buf + pos, 4); pos += 4; break; case IEEE802154_SCF_KEY_HW_INDEX: memcpy(&hdr->extended_src, buf + pos, IEEE802154_ADDR_LEN); pos += IEEE802154_ADDR_LEN; break; } hdr->key_id = buf[pos++]; return pos; } static int ieee802154_sechdr_lengths[4] = { [IEEE802154_SCF_KEY_IMPLICIT] = 5, [IEEE802154_SCF_KEY_INDEX] = 6, [IEEE802154_SCF_KEY_SHORT_INDEX] = 10, [IEEE802154_SCF_KEY_HW_INDEX] = 14, }; static int ieee802154_hdr_sechdr_len(u8 sc) { return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)]; } static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr) { int dlen, slen; dlen = ieee802154_hdr_addr_len(hdr->fc.dest_addr_mode, false); slen = ieee802154_hdr_addr_len(hdr->fc.source_addr_mode, hdr->fc.intra_pan); if (slen < 0 || dlen < 0) return -EINVAL; return 3 + dlen + slen + hdr->fc.security_enabled; } static int ieee802154_hdr_get_addrs(const u8 *buf, struct ieee802154_hdr *hdr) { int pos = 0; pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.dest_addr_mode, false, &hdr->dest); pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.source_addr_mode, hdr->fc.intra_pan, &hdr->source); if (hdr->fc.intra_pan) hdr->source.pan_id = hdr->dest.pan_id; return pos; } int ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr) { int pos = 3, rc; if (!pskb_may_pull(skb, 3)) return -EINVAL; memcpy(hdr, skb->data, 3); rc = ieee802154_hdr_minlen(hdr); if (rc < 0 || !pskb_may_pull(skb, rc)) return -EINVAL; pos += ieee802154_hdr_get_addrs(skb->data + pos, hdr); if (hdr->fc.security_enabled) { int want = pos + ieee802154_hdr_sechdr_len(skb->data[pos]); if (!pskb_may_pull(skb, want)) return -EINVAL; pos += ieee802154_hdr_get_sechdr(skb->data + pos, &hdr->sec); } skb_pull(skb, pos); return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_pull); int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb, struct ieee802154_mac_cmd_pl *mac_pl) { if (!pskb_may_pull(skb, sizeof(*mac_pl))) return -EINVAL; memcpy(mac_pl, skb->data, sizeof(*mac_pl)); skb_pull(skb, sizeof(*mac_pl)); return 0; } EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_pl_pull); int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) { const u8 *buf = skb_mac_header(skb); int pos = 3, rc; if (buf + 3 > skb_tail_pointer(skb)) return -EINVAL; memcpy(hdr, buf, 3); rc = ieee802154_hdr_minlen(hdr); if (rc < 0 || buf + rc > skb_tail_pointer(skb)) return -EINVAL; pos += ieee802154_hdr_get_addrs(buf + pos, hdr); return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs); int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr) { const u8 *buf = skb_mac_header(skb); int pos; pos = ieee802154_hdr_peek_addrs(skb, hdr); if (pos < 0) return -EINVAL; if (hdr->fc.security_enabled) { u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos)); int want = pos + ieee802154_sechdr_lengths[key_id_mode]; if (buf + want > skb_tail_pointer(skb)) return -EINVAL; pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec); } return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_peek); int ieee802154_max_payload(const struct ieee802154_hdr *hdr) { int hlen = ieee802154_hdr_minlen(hdr); if (hdr->fc.security_enabled) { hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1; hlen += ieee802154_sechdr_authtag_len(&hdr->sec); } return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE; } EXPORT_SYMBOL_GPL(ieee802154_max_payload);
1 10 1 10 1 12 12 12 12 1 1 1 1 10 9 1 9 1 2 7 1 2 1 2 1 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 // SPDX-License-Identifier: GPL-2.0-or-later /* * Bridge per vlan tunnel port dst_metadata netlink control interface * * Authors: * Roopa Prabhu <roopa@cumulusnetworks.com> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/etherdevice.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> #include <uapi/linux/if_bridge.h> #include <net/dst_metadata.h> #include "br_private.h" #include "br_private_tunnel.h" static size_t __get_vlan_tinfo_size(void) { return nla_total_size(0) + /* nest IFLA_BRIDGE_VLAN_TUNNEL_INFO */ nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_VLAN_TUNNEL_ID */ nla_total_size(sizeof(u16)) + /* IFLA_BRIDGE_VLAN_TUNNEL_VID */ nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ } bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *v_last) { __be32 tunid_curr = tunnel_id_to_key32(v_curr->tinfo.tunnel_id); __be32 tunid_last = tunnel_id_to_key32(v_last->tinfo.tunnel_id); return (be32_to_cpu(tunid_curr) - be32_to_cpu(tunid_last)) == 1; } static int __get_num_vlan_tunnel_infos(struct net_bridge_vlan_group *vg) { struct net_bridge_vlan *v, *vtbegin = NULL, *vtend = NULL; int num_tinfos = 0; /* Count number of vlan infos */ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { /* only a context, bridge vlan not activated */ if (!br_vlan_should_use(v) || !v->tinfo.tunnel_id) continue; if (!vtbegin) { goto initvars; } else if ((v->vid - vtend->vid) == 1 && vlan_tunid_inrange(v, vtend)) { vtend = v; continue; } else { if ((vtend->vid - vtbegin->vid) > 0) num_tinfos += 2; else num_tinfos += 1; } initvars: vtbegin = v; vtend = v; } if (vtbegin && vtend) { if ((vtend->vid - vtbegin->vid) > 0) num_tinfos += 2; else num_tinfos += 1; } return num_tinfos; } int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg) { int num_tinfos; if (!vg) return 0; rcu_read_lock(); num_tinfos = __get_num_vlan_tunnel_infos(vg); rcu_read_unlock(); return num_tinfos * __get_vlan_tinfo_size(); } static int br_fill_vlan_tinfo(struct sk_buff *skb, u16 vid, __be64 tunnel_id, u16 flags) { __be32 tid = tunnel_id_to_key32(tunnel_id); struct nlattr *tmap; tmap = nla_nest_start_noflag(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); if (!tmap) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID, be32_to_cpu(tid))) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_VID, vid)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, flags)) goto nla_put_failure; nla_nest_end(skb, tmap); return 0; nla_put_failure: nla_nest_cancel(skb, tmap); return -EMSGSIZE; } static int br_fill_vlan_tinfo_range(struct sk_buff *skb, struct net_bridge_vlan *vtbegin, struct net_bridge_vlan *vtend) { int err; if (vtend && (vtend->vid - vtbegin->vid) > 0) { /* add range to skb */ err = br_fill_vlan_tinfo(skb, vtbegin->vid, vtbegin->tinfo.tunnel_id, BRIDGE_VLAN_INFO_RANGE_BEGIN); if (err) return err; err = br_fill_vlan_tinfo(skb, vtend->vid, vtend->tinfo.tunnel_id, BRIDGE_VLAN_INFO_RANGE_END); if (err) return err; } else { err = br_fill_vlan_tinfo(skb, vtbegin->vid, vtbegin->tinfo.tunnel_id, 0); if (err) return err; } return 0; } int br_fill_vlan_tunnel_info(struct sk_buff *skb, struct net_bridge_vlan_group *vg) { struct net_bridge_vlan *vtbegin = NULL; struct net_bridge_vlan *vtend = NULL; struct net_bridge_vlan *v; int err; /* Count number of vlan infos */ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { /* only a context, bridge vlan not activated */ if (!br_vlan_should_use(v)) continue; if (!v->tinfo.tunnel_dst) continue; if (!vtbegin) { goto initvars; } else if ((v->vid - vtend->vid) == 1 && vlan_tunid_inrange(v, vtend)) { vtend = v; continue; } else { err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); if (err) return err; } initvars: vtbegin = v; vtend = v; } if (vtbegin) { err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); if (err) return err; } return 0; } static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = { [IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC] = { .strict_start_type = IFLA_BRIDGE_VLAN_TUNNEL_FLAGS + 1 }, [IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 }, [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 }, }; int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, u16 vid, u32 tun_id, bool *changed) { int err = 0; if (!p) return -EINVAL; switch (cmd) { case RTM_SETLINK: err = nbp_vlan_tunnel_info_add(p, vid, tun_id); if (!err) *changed = true; break; case RTM_DELLINK: if (!nbp_vlan_tunnel_info_delete(p, vid)) *changed = true; break; } return err; } int br_parse_vlan_tunnel_info(struct nlattr *attr, struct vtunnel_info *tinfo) { struct nlattr *tb[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1]; u32 tun_id; u16 vid, flags = 0; int err; memset(tinfo, 0, sizeof(*tinfo)); err = nla_parse_nested_deprecated(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr, vlan_tunnel_policy, NULL); if (err < 0) return err; if (!tb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || !tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) return -EINVAL; tun_id = nla_get_u32(tb[IFLA_BRIDGE_VLAN_TUNNEL_ID]); vid = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]); if (vid >= VLAN_VID_MASK) return -ERANGE; if (tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]) flags = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]); tinfo->tunid = tun_id; tinfo->vid = vid; tinfo->flags = flags; return 0; } /* send a notification if v_curr can't enter the range and start a new one */ static void __vlan_tunnel_handle_range(const struct net_bridge_port *p, struct net_bridge_vlan **v_start, struct net_bridge_vlan **v_end, int v_curr, bool curr_change) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; vg = nbp_vlan_group(p); if (!vg) return; v = br_vlan_find(vg, v_curr); if (!*v_start) goto out_init; if (v && curr_change && br_vlan_can_enter_range(v, *v_end)) { *v_end = v; return; } br_vlan_notify(p->br, p, (*v_start)->vid, (*v_end)->vid, RTM_NEWVLAN); out_init: /* we start a range only if there are any changes to notify about */ *v_start = curr_change ? v : NULL; *v_end = *v_start; } int br_process_vlan_tunnel_info(const struct net_bridge *br, const struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, struct vtunnel_info *tinfo_last, bool *changed) { int err; if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) return -EINVAL; memcpy(tinfo_last, tinfo_curr, sizeof(struct vtunnel_info)); } else if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END) { struct net_bridge_vlan *v_start = NULL, *v_end = NULL; int t, v; if (!(tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN)) return -EINVAL; if ((tinfo_curr->vid - tinfo_last->vid) != (tinfo_curr->tunid - tinfo_last->tunid)) return -EINVAL; t = tinfo_last->tunid; for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { bool curr_change = false; err = br_vlan_tunnel_info(p, cmd, v, t, &curr_change); if (err) break; t++; if (curr_change) *changed = curr_change; __vlan_tunnel_handle_range(p, &v_start, &v_end, v, curr_change); } if (v_start && v_end) br_vlan_notify(br, p, v_start->vid, v_end->vid, RTM_NEWVLAN); if (err) return err; memset(tinfo_last, 0, sizeof(struct vtunnel_info)); memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); } else { if (tinfo_last->flags) return -EINVAL; err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, tinfo_curr->tunid, changed); if (err) return err; br_vlan_notify(br, p, tinfo_curr->vid, 0, RTM_NEWVLAN); memset(tinfo_last, 0, sizeof(struct vtunnel_info)); memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); } return 0; }
364 5 11 2 11 3 10 2 10 11 23 23 137 190 190 189 115 181 120 36 137 32 113 114 115 32 32 32 177 165 105 137 137 137 28 137 137 137 137 137 116 137 137 136 137 137 137 137 137 137 137 114 137 137 11 137 115 137 137 177 137 177 177 177 177 177 137 137 138 138 11 137 355 355 355 138 138 138 137 11 51 251 293 292 21 15 14 21 5 5 2 5 21 21 177 5 177 177 32 177 286 286 47 286 286 286 266 266 127 189 189 284 3 3 299 43 5 256 298 45 255 119 22 22 20 11 11 11 8 4 4 4 1 1 11 11 11 11 364 364 364 364 2 364 364 284 180 57 128 128 284 362 364 2 6038 1 6041 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/swapfile.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie */ #include <linux/blkdev.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/kernel_stat.h> #include <linux/swap.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/shmem_fs.h> #include <linux/blk-cgroup.h> #include <linux/random.h> #include <linux/writeback.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/security.h> #include <linux/backing-dev.h> #include <linux/mutex.h> #include <linux/capability.h> #include <linux/syscalls.h> #include <linux/memcontrol.h> #include <linux/poll.h> #include <linux/oom.h> #include <linux/swapfile.h> #include <linux/export.h> #include <linux/swap_slots.h> #include <linux/sort.h> #include <linux/completion.h> #include <linux/suspend.h> #include <linux/zswap.h> #include <linux/plist.h> #include <asm/tlbflush.h> #include <linux/swapops.h> #include <linux/swap_cgroup.h> #include "internal.h" #include "swap.h" static bool swap_count_continued(struct swap_info_struct *, pgoff_t, unsigned char); static void free_swap_count_continuations(struct swap_info_struct *); static void swap_entry_range_free(struct swap_info_struct *si, struct swap_cluster_info *ci, swp_entry_t entry, unsigned int nr_pages); static void swap_range_alloc(struct swap_info_struct *si, unsigned int nr_entries); static bool folio_swapcache_freeable(struct folio *folio); static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, unsigned long offset); static inline void unlock_cluster(struct swap_cluster_info *ci); static DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; atomic_long_t nr_swap_pages; /* * Some modules use swappable objects and may try to swap them out under * memory pressure (via the shrinker). Before doing so, they may wish to * check to see if any swap space is available. */ EXPORT_SYMBOL_GPL(nr_swap_pages); /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ long total_swap_pages; static int least_priority = -1; unsigned long swapfile_maximum_size; #ifdef CONFIG_MIGRATION bool swap_migration_ad_supported; #endif /* CONFIG_MIGRATION */ static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; /* * all active swap_info_structs * protected with swap_lock, and ordered by priority. */ static PLIST_HEAD(swap_active_head); /* * all available (active, not full) swap_info_structs * protected with swap_avail_lock, ordered by priority. * This is used by folio_alloc_swap() instead of swap_active_head * because swap_active_head includes all swap_info_structs, * but folio_alloc_swap() doesn't need to look at full ones. * This uses its own lock instead of swap_lock because when a * swap_info_struct changes between not-full/full, it needs to * add/remove itself to/from this list, but the swap_info_struct->lock * is held and the locking order requires swap_lock to be taken * before any swap_info_struct->lock. */ static struct plist_head *swap_avail_heads; static DEFINE_SPINLOCK(swap_avail_lock); static struct swap_info_struct *swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait); /* Activity counter to indicate that a swapon or swapoff has occurred */ static atomic_t proc_poll_event = ATOMIC_INIT(0); atomic_t nr_rotate_swap = ATOMIC_INIT(0); static struct swap_info_struct *swap_type_to_swap_info(int type) { if (type >= MAX_SWAPFILES) return NULL; return READ_ONCE(swap_info[type]); /* rcu_dereference() */ } static inline unsigned char swap_count(unsigned char ent) { return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ } /* * Use the second highest bit of inuse_pages counter as the indicator * if one swap device is on the available plist, so the atomic can * still be updated arithmetically while having special data embedded. * * inuse_pages counter is the only thing indicating if a device should * be on avail_lists or not (except swapon / swapoff). By embedding the * off-list bit in the atomic counter, updates no longer need any lock * to check the list status. * * This bit will be set if the device is not on the plist and not * usable, will be cleared if the device is on the plist. */ #define SWAP_USAGE_OFFLIST_BIT (1UL << (BITS_PER_TYPE(atomic_t) - 2)) #define SWAP_USAGE_COUNTER_MASK (~SWAP_USAGE_OFFLIST_BIT) static long swap_usage_in_pages(struct swap_info_struct *si) { return atomic_long_read(&si->inuse_pages) & SWAP_USAGE_COUNTER_MASK; } /* Reclaim the swap entry anyway if possible */ #define TTRS_ANYWAY 0x1 /* * Reclaim the swap entry if there are no more mappings of the * corresponding page */ #define TTRS_UNMAPPED 0x2 /* Reclaim the swap entry if swap is getting full */ #define TTRS_FULL 0x4 /* Reclaim directly, bypass the slot cache and don't touch device lock */ #define TTRS_DIRECT 0x8 static bool swap_is_has_cache(struct swap_info_struct *si, unsigned long offset, int nr_pages) { unsigned char *map = si->swap_map + offset; unsigned char *map_end = map + nr_pages; do { VM_BUG_ON(!(*map & SWAP_HAS_CACHE)); if (*map != SWAP_HAS_CACHE) return false; } while (++map < map_end); return true; } static bool swap_is_last_map(struct swap_info_struct *si, unsigned long offset, int nr_pages, bool *has_cache) { unsigned char *map = si->swap_map + offset; unsigned char *map_end = map + nr_pages; unsigned char count = *map; if (swap_count(count) != 1) return false; while (++map < map_end) { if (*map != count) return false; } *has_cache = !!(count & SWAP_HAS_CACHE); return true; } /* * returns number of pages in the folio that backs the swap entry. If positive, * the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no * folio was associated with the swap entry. */ static int __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset, unsigned long flags) { swp_entry_t entry = swp_entry(si->type, offset); struct address_space *address_space = swap_address_space(entry); struct swap_cluster_info *ci; struct folio *folio; int ret, nr_pages; bool need_reclaim; folio = filemap_get_folio(address_space, swap_cache_index(entry)); if (IS_ERR(folio)) return 0; nr_pages = folio_nr_pages(folio); ret = -nr_pages; /* * When this function is called from scan_swap_map_slots() and it's * called by vmscan.c at reclaiming folios. So we hold a folio lock * here. We have to use trylock for avoiding deadlock. This is a special * case and you should use folio_free_swap() with explicit folio_lock() * in usual operations. */ if (!folio_trylock(folio)) goto out; /* offset could point to the middle of a large folio */ entry = folio->swap; offset = swp_offset(entry); need_reclaim = ((flags & TTRS_ANYWAY) || ((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) || ((flags & TTRS_FULL) && mem_cgroup_swap_full(folio))); if (!need_reclaim || !folio_swapcache_freeable(folio)) goto out_unlock; /* * It's safe to delete the folio from swap cache only if the folio's * swap_map is HAS_CACHE only, which means the slots have no page table * reference or pending writeback, and can't be allocated to others. */ ci = lock_cluster(si, offset); need_reclaim = swap_is_has_cache(si, offset, nr_pages); unlock_cluster(ci); if (!need_reclaim) goto out_unlock; if (!(flags & TTRS_DIRECT)) { /* Free through slot cache */ delete_from_swap_cache(folio); folio_set_dirty(folio); ret = nr_pages; goto out_unlock; } xa_lock_irq(&address_space->i_pages); __delete_from_swap_cache(folio, entry, NULL); xa_unlock_irq(&address_space->i_pages); folio_ref_sub(folio, nr_pages); folio_set_dirty(folio); ci = lock_cluster(si, offset); swap_entry_range_free(si, ci, entry, nr_pages); unlock_cluster(ci); ret = nr_pages; out_unlock: folio_unlock(folio); out: folio_put(folio); return ret; } static inline struct swap_extent *first_se(struct swap_info_struct *sis) { struct rb_node *rb = rb_first(&sis->swap_extent_root); return rb_entry(rb, struct swap_extent, rb_node); } static inline struct swap_extent *next_se(struct swap_extent *se) { struct rb_node *rb = rb_next(&se->rb_node); return rb ? rb_entry(rb, struct swap_extent, rb_node) : NULL; } /* * swapon tell device that all the old swap contents can be discarded, * to allow the swap device to optimize its wear-levelling. */ static int discard_swap(struct swap_info_struct *si) { struct swap_extent *se; sector_t start_block; sector_t nr_blocks; int err = 0; /* Do not discard the swap header page! */ se = first_se(si); start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_KERNEL); if (err) return err; cond_resched(); } for (se = next_se(se); se; se = next_se(se)) { start_block = se->start_block << (PAGE_SHIFT - 9); nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_KERNEL); if (err) break; cond_resched(); } return err; /* That will often be -EOPNOTSUPP */ } static struct swap_extent * offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) { struct swap_extent *se; struct rb_node *rb; rb = sis->swap_extent_root.rb_node; while (rb) { se = rb_entry(rb, struct swap_extent, rb_node); if (offset < se->start_page) rb = rb->rb_left; else if (offset >= se->start_page + se->nr_pages) rb = rb->rb_right; else return se; } /* It *must* be present */ BUG(); } sector_t swap_folio_sector(struct folio *folio) { struct swap_info_struct *sis = swp_swap_info(folio->swap); struct swap_extent *se; sector_t sector; pgoff_t offset; offset = swp_offset(folio->swap); se = offset_to_swap_extent(sis, offset); sector = se->start_block + (offset - se->start_page); return sector << (PAGE_SHIFT - 9); } /* * swap allocation tell device that a cluster of swap can now be discarded, * to allow the swap device to optimize its wear-levelling. */ static void discard_swap_cluster(struct swap_info_struct *si, pgoff_t start_page, pgoff_t nr_pages) { struct swap_extent *se = offset_to_swap_extent(si, start_page); while (nr_pages) { pgoff_t offset = start_page - se->start_page; sector_t start_block = se->start_block + offset; sector_t nr_blocks = se->nr_pages - offset; if (nr_blocks > nr_pages) nr_blocks = nr_pages; start_page += nr_blocks; nr_pages -= nr_blocks; start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_NOIO)) break; se = next_se(se); } } #ifdef CONFIG_THP_SWAP #define SWAPFILE_CLUSTER HPAGE_PMD_NR #define swap_entry_order(order) (order) #else #define SWAPFILE_CLUSTER 256 /* * Define swap_entry_order() as constant to let compiler to optimize * out some code if !CONFIG_THP_SWAP */ #define swap_entry_order(order) 0 #endif #define LATENCY_LIMIT 256 static inline bool cluster_is_empty(struct swap_cluster_info *info) { return info->count == 0; } static inline bool cluster_is_discard(struct swap_cluster_info *info) { return info->flags == CLUSTER_FLAG_DISCARD; } static inline bool cluster_is_usable(struct swap_cluster_info *ci, int order) { if (unlikely(ci->flags > CLUSTER_FLAG_USABLE)) return false; if (!order) return true; return cluster_is_empty(ci) || order == ci->order; } static inline unsigned int cluster_index(struct swap_info_struct *si, struct swap_cluster_info *ci) { return ci - si->cluster_info; } static inline struct swap_cluster_info *offset_to_cluster(struct swap_info_struct *si, unsigned long offset) { return &si->cluster_info[offset / SWAPFILE_CLUSTER]; } static inline unsigned int cluster_offset(struct swap_info_struct *si, struct swap_cluster_info *ci) { return cluster_index(si, ci) * SWAPFILE_CLUSTER; } static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, unsigned long offset) { struct swap_cluster_info *ci; ci = offset_to_cluster(si, offset); spin_lock(&ci->lock); return ci; } static inline void unlock_cluster(struct swap_cluster_info *ci) { spin_unlock(&ci->lock); } static void move_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci, struct list_head *list, enum swap_cluster_flags new_flags) { VM_WARN_ON(ci->flags == new_flags); BUILD_BUG_ON(1 << sizeof(ci->flags) * BITS_PER_BYTE < CLUSTER_FLAG_MAX); lockdep_assert_held(&ci->lock); spin_lock(&si->lock); if (ci->flags == CLUSTER_FLAG_NONE) list_add_tail(&ci->list, list); else list_move_tail(&ci->list, list); spin_unlock(&si->lock); if (ci->flags == CLUSTER_FLAG_FRAG) atomic_long_dec(&si->frag_cluster_nr[ci->order]); else if (new_flags == CLUSTER_FLAG_FRAG) atomic_long_inc(&si->frag_cluster_nr[ci->order]); ci->flags = new_flags; } /* Add a cluster to discard list and schedule it to do discard */ static void swap_cluster_schedule_discard(struct swap_info_struct *si, struct swap_cluster_info *ci) { unsigned int idx = cluster_index(si, ci); /* * If scan_swap_map_slots() can't find a free cluster, it will check * si->swap_map directly. To make sure the discarding cluster isn't * taken by scan_swap_map_slots(), mark the swap entries bad (occupied). * It will be cleared after discard */ memset(si->swap_map + idx * SWAPFILE_CLUSTER, SWAP_MAP_BAD, SWAPFILE_CLUSTER); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); move_cluster(si, ci, &si->discard_clusters, CLUSTER_FLAG_DISCARD); schedule_work(&si->discard_work); } static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { lockdep_assert_held(&ci->lock); move_cluster(si, ci, &si->free_clusters, CLUSTER_FLAG_FREE); ci->order = 0; } /* * Isolate and lock the first cluster that is not contented on a list, * clean its flag before taken off-list. Cluster flag must be in sync * with list status, so cluster updaters can always know the cluster * list status without touching si lock. * * Note it's possible that all clusters on a list are contented so * this returns NULL for an non-empty list. */ static struct swap_cluster_info *isolate_lock_cluster( struct swap_info_struct *si, struct list_head *list) { struct swap_cluster_info *ci, *ret = NULL; spin_lock(&si->lock); if (unlikely(!(si->flags & SWP_WRITEOK))) goto out; list_for_each_entry(ci, list, list) { if (!spin_trylock(&ci->lock)) continue; /* We may only isolate and clear flags of following lists */ VM_BUG_ON(!ci->flags); VM_BUG_ON(ci->flags > CLUSTER_FLAG_USABLE && ci->flags != CLUSTER_FLAG_FULL); list_del(&ci->list); ci->flags = CLUSTER_FLAG_NONE; ret = ci; break; } out: spin_unlock(&si->lock); return ret; } /* * Doing discard actually. After a cluster discard is finished, the cluster * will be added to free cluster list. Discard cluster is a bit special as * they don't participate in allocation or reclaim, so clusters marked as * CLUSTER_FLAG_DISCARD must remain off-list or on discard list. */ static bool swap_do_scheduled_discard(struct swap_info_struct *si) { struct swap_cluster_info *ci; bool ret = false; unsigned int idx; spin_lock(&si->lock); while (!list_empty(&si->discard_clusters)) { ci = list_first_entry(&si->discard_clusters, struct swap_cluster_info, list); /* * Delete the cluster from list to prepare for discard, but keep * the CLUSTER_FLAG_DISCARD flag, there could be percpu_cluster * pointing to it, or ran into by relocate_cluster. */ list_del(&ci->list); idx = cluster_index(si, ci); spin_unlock(&si->lock); discard_swap_cluster(si, idx * SWAPFILE_CLUSTER, SWAPFILE_CLUSTER); spin_lock(&ci->lock); /* * Discard is done, clear its flags as it's off-list, then * return the cluster to allocation list. */ ci->flags = CLUSTER_FLAG_NONE; memset(si->swap_map + idx * SWAPFILE_CLUSTER, 0, SWAPFILE_CLUSTER); __free_cluster(si, ci); spin_unlock(&ci->lock); ret = true; spin_lock(&si->lock); } spin_unlock(&si->lock); return ret; } static void swap_discard_work(struct work_struct *work) { struct swap_info_struct *si; si = container_of(work, struct swap_info_struct, discard_work); swap_do_scheduled_discard(si); } static void swap_users_ref_free(struct percpu_ref *ref) { struct swap_info_struct *si; si = container_of(ref, struct swap_info_struct, users); complete(&si->comp); } /* * Must be called after freeing if ci->count == 0, moves the cluster to free * or discard list. */ static void free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { VM_BUG_ON(ci->count != 0); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); lockdep_assert_held(&ci->lock); /* * If the swap is discardable, prepare discard the cluster * instead of free it immediately. The cluster will be freed * after discard. */ if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == (SWP_WRITEOK | SWP_PAGE_DISCARD)) { swap_cluster_schedule_discard(si, ci); return; } __free_cluster(si, ci); } /* * Must be called after freeing if ci->count != 0, moves the cluster to * nonfull list. */ static void partial_free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { VM_BUG_ON(!ci->count || ci->count == SWAPFILE_CLUSTER); lockdep_assert_held(&ci->lock); if (ci->flags != CLUSTER_FLAG_NONFULL) move_cluster(si, ci, &si->nonfull_clusters[ci->order], CLUSTER_FLAG_NONFULL); } /* * Must be called after allocation, moves the cluster to full or frag list. * Note: allocation doesn't acquire si lock, and may drop the ci lock for * reclaim, so the cluster could be any where when called. */ static void relocate_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { lockdep_assert_held(&ci->lock); /* Discard cluster must remain off-list or on discard list */ if (cluster_is_discard(ci)) return; if (!ci->count) { free_cluster(si, ci); } else if (ci->count != SWAPFILE_CLUSTER) { if (ci->flags != CLUSTER_FLAG_FRAG) move_cluster(si, ci, &si->frag_clusters[ci->order], CLUSTER_FLAG_FRAG); } else { if (ci->flags != CLUSTER_FLAG_FULL) move_cluster(si, ci, &si->full_clusters, CLUSTER_FLAG_FULL); } } /* * The cluster corresponding to page_nr will be used. The cluster will not be * added to free cluster list and its usage counter will be increased by 1. * Only used for initialization. */ static void inc_cluster_info_page(struct swap_info_struct *si, struct swap_cluster_info *cluster_info, unsigned long page_nr) { unsigned long idx = page_nr / SWAPFILE_CLUSTER; struct swap_cluster_info *ci; ci = cluster_info + idx; ci->count++; VM_BUG_ON(ci->count > SWAPFILE_CLUSTER); VM_BUG_ON(ci->flags); } static bool cluster_reclaim_range(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned long start, unsigned long end) { unsigned char *map = si->swap_map; unsigned long offset = start; int nr_reclaim; spin_unlock(&ci->lock); do { switch (READ_ONCE(map[offset])) { case 0: offset++; break; case SWAP_HAS_CACHE: nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT); if (nr_reclaim > 0) offset += nr_reclaim; else goto out; break; default: goto out; } } while (offset < end); out: spin_lock(&ci->lock); /* * Recheck the range no matter reclaim succeeded or not, the slot * could have been be freed while we are not holding the lock. */ for (offset = start; offset < end; offset++) if (READ_ONCE(map[offset])) return false; return true; } static bool cluster_scan_range(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned long start, unsigned int nr_pages, bool *need_reclaim) { unsigned long offset, end = start + nr_pages; unsigned char *map = si->swap_map; for (offset = start; offset < end; offset++) { switch (READ_ONCE(map[offset])) { case 0: continue; case SWAP_HAS_CACHE: if (!vm_swap_full()) return false; *need_reclaim = true; continue; default: return false; } } return true; } static bool cluster_alloc_range(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned int start, unsigned char usage, unsigned int order) { unsigned int nr_pages = 1 << order; lockdep_assert_held(&ci->lock); if (!(si->flags & SWP_WRITEOK)) return false; /* * The first allocation in a cluster makes the * cluster exclusive to this order */ if (cluster_is_empty(ci)) ci->order = order; memset(si->swap_map + start, usage, nr_pages); swap_range_alloc(si, nr_pages); ci->count += nr_pages; return true; } /* Try use a new cluster for current CPU and allocate from it. */ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned long offset, unsigned int order, unsigned char usage) { unsigned int next = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID; unsigned long start = ALIGN_DOWN(offset, SWAPFILE_CLUSTER); unsigned long end = min(start + SWAPFILE_CLUSTER, si->max); unsigned int nr_pages = 1 << order; bool need_reclaim, ret; lockdep_assert_held(&ci->lock); if (end < nr_pages || ci->count + nr_pages > SWAPFILE_CLUSTER) goto out; for (end -= nr_pages; offset <= end; offset += nr_pages) { need_reclaim = false; if (!cluster_scan_range(si, ci, offset, nr_pages, &need_reclaim)) continue; if (need_reclaim) { ret = cluster_reclaim_range(si, ci, offset, offset + nr_pages); /* * Reclaim drops ci->lock and cluster could be used * by another order. Not checking flag as off-list * cluster has no flag set, and change of list * won't cause fragmentation. */ if (!cluster_is_usable(ci, order)) goto out; if (cluster_is_empty(ci)) offset = start; /* Reclaim failed but cluster is usable, try next */ if (!ret) continue; } if (!cluster_alloc_range(si, ci, offset, usage, order)) break; found = offset; offset += nr_pages; if (ci->count < SWAPFILE_CLUSTER && offset <= end) next = offset; break; } out: relocate_cluster(si, ci); unlock_cluster(ci); if (si->flags & SWP_SOLIDSTATE) __this_cpu_write(si->percpu_cluster->next[order], next); else si->global_cluster->next[order] = next; return found; } /* Return true if reclaimed a whole cluster */ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) { long to_scan = 1; unsigned long offset, end; struct swap_cluster_info *ci; unsigned char *map = si->swap_map; int nr_reclaim; if (force) to_scan = swap_usage_in_pages(si) / SWAPFILE_CLUSTER; while ((ci = isolate_lock_cluster(si, &si->full_clusters))) { offset = cluster_offset(si, ci); end = min(si->max, offset + SWAPFILE_CLUSTER); to_scan--; while (offset < end) { if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) { spin_unlock(&ci->lock); nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT); spin_lock(&ci->lock); if (nr_reclaim) { offset += abs(nr_reclaim); continue; } } offset++; } unlock_cluster(ci); if (to_scan <= 0) break; } } static void swap_reclaim_work(struct work_struct *work) { struct swap_info_struct *si; si = container_of(work, struct swap_info_struct, reclaim_work); swap_reclaim_full_clusters(si, true); } /* * Try to get swap entries with specified order from current cpu's swap entry * pool (a cluster). This might involve allocating a new cluster for current CPU * too. */ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int order, unsigned char usage) { struct swap_cluster_info *ci; unsigned int offset, found = 0; if (si->flags & SWP_SOLIDSTATE) { /* Fast path using per CPU cluster */ local_lock(&si->percpu_cluster->lock); offset = __this_cpu_read(si->percpu_cluster->next[order]); } else { /* Serialize HDD SWAP allocation for each device. */ spin_lock(&si->global_cluster_lock); offset = si->global_cluster->next[order]; } if (offset) { ci = lock_cluster(si, offset); /* Cluster could have been used by another order */ if (cluster_is_usable(ci, order)) { if (cluster_is_empty(ci)) offset = cluster_offset(si, ci); found = alloc_swap_scan_cluster(si, ci, offset, order, usage); } else { unlock_cluster(ci); } if (found) goto done; } new_cluster: ci = isolate_lock_cluster(si, &si->free_clusters); if (ci) { found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), order, usage); if (found) goto done; } /* Try reclaim from full clusters if free clusters list is drained */ if (vm_swap_full()) swap_reclaim_full_clusters(si, false); if (order < PMD_ORDER) { unsigned int frags = 0, frags_existing; while ((ci = isolate_lock_cluster(si, &si->nonfull_clusters[order]))) { found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), order, usage); if (found) goto done; /* Clusters failed to allocate are moved to frag_clusters */ frags++; } frags_existing = atomic_long_read(&si->frag_cluster_nr[order]); while (frags < frags_existing && (ci = isolate_lock_cluster(si, &si->frag_clusters[order]))) { atomic_long_dec(&si->frag_cluster_nr[order]); /* * Rotate the frag list to iterate, they were all * failing high order allocation or moved here due to * per-CPU usage, but they could contain newly released * reclaimable (eg. lazy-freed swap cache) slots. */ found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), order, usage); if (found) goto done; frags++; } } /* * We don't have free cluster but have some clusters in * discarding, do discard now and reclaim them, then * reread cluster_next_cpu since we dropped si->lock */ if ((si->flags & SWP_PAGE_DISCARD) && swap_do_scheduled_discard(si)) goto new_cluster; if (order) goto done; /* Order 0 stealing from higher order */ for (int o = 1; o < SWAP_NR_ORDERS; o++) { /* * Clusters here have at least one usable slots and can't fail order 0 * allocation, but reclaim may drop si->lock and race with another user. */ while ((ci = isolate_lock_cluster(si, &si->frag_clusters[o]))) { atomic_long_dec(&si->frag_cluster_nr[o]); found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), 0, usage); if (found) goto done; } while ((ci = isolate_lock_cluster(si, &si->nonfull_clusters[o]))) { found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), 0, usage); if (found) goto done; } } done: if (si->flags & SWP_SOLIDSTATE) local_unlock(&si->percpu_cluster->lock); else spin_unlock(&si->global_cluster_lock); return found; } /* SWAP_USAGE_OFFLIST_BIT can only be set by this helper. */ static void del_from_avail_list(struct swap_info_struct *si, bool swapoff) { int nid; unsigned long pages; spin_lock(&swap_avail_lock); if (swapoff) { /* * Forcefully remove it. Clear the SWP_WRITEOK flags for * swapoff here so it's synchronized by both si->lock and * swap_avail_lock, to ensure the result can be seen by * add_to_avail_list. */ lockdep_assert_held(&si->lock); si->flags &= ~SWP_WRITEOK; atomic_long_or(SWAP_USAGE_OFFLIST_BIT, &si->inuse_pages); } else { /* * If not called by swapoff, take it off-list only if it's * full and SWAP_USAGE_OFFLIST_BIT is not set (strictly * si->inuse_pages == pages), any concurrent slot freeing, * or device already removed from plist by someone else * will make this return false. */ pages = si->pages; if (!atomic_long_try_cmpxchg(&si->inuse_pages, &pages, pages | SWAP_USAGE_OFFLIST_BIT)) goto skip; } for_each_node(nid) plist_del(&si->avail_lists[nid], &swap_avail_heads[nid]); skip: spin_unlock(&swap_avail_lock); } /* SWAP_USAGE_OFFLIST_BIT can only be cleared by this helper. */ static void add_to_avail_list(struct swap_info_struct *si, bool swapon) { int nid; long val; unsigned long pages; spin_lock(&swap_avail_lock); /* Corresponding to SWP_WRITEOK clearing in del_from_avail_list */ if (swapon) { lockdep_assert_held(&si->lock); si->flags |= SWP_WRITEOK; } else { if (!(READ_ONCE(si->flags) & SWP_WRITEOK)) goto skip; } if (!(atomic_long_read(&si->inuse_pages) & SWAP_USAGE_OFFLIST_BIT)) goto skip; val = atomic_long_fetch_and_relaxed(~SWAP_USAGE_OFFLIST_BIT, &si->inuse_pages); /* * When device is full and device is on the plist, only one updater will * see (inuse_pages == si->pages) and will call del_from_avail_list. If * that updater happen to be here, just skip adding. */ pages = si->pages; if (val == pages) { /* Just like the cmpxchg in del_from_avail_list */ if (atomic_long_try_cmpxchg(&si->inuse_pages, &pages, pages | SWAP_USAGE_OFFLIST_BIT)) goto skip; } for_each_node(nid) plist_add(&si->avail_lists[nid], &swap_avail_heads[nid]); skip: spin_unlock(&swap_avail_lock); } /* * swap_usage_add / swap_usage_sub of each slot are serialized by ci->lock * within each cluster, so the total contribution to the global counter should * always be positive and cannot exceed the total number of usable slots. */ static bool swap_usage_add(struct swap_info_struct *si, unsigned int nr_entries) { long val = atomic_long_add_return_relaxed(nr_entries, &si->inuse_pages); /* * If device is full, and SWAP_USAGE_OFFLIST_BIT is not set, * remove it from the plist. */ if (unlikely(val == si->pages)) { del_from_avail_list(si, false); return true; } return false; } static void swap_usage_sub(struct swap_info_struct *si, unsigned int nr_entries) { long val = atomic_long_sub_return_relaxed(nr_entries, &si->inuse_pages); /* * If device is not full, and SWAP_USAGE_OFFLIST_BIT is set, * remove it from the plist. */ if (unlikely(val & SWAP_USAGE_OFFLIST_BIT)) add_to_avail_list(si, false); } static void swap_range_alloc(struct swap_info_struct *si, unsigned int nr_entries) { if (swap_usage_add(si, nr_entries)) { if (vm_swap_full()) schedule_work(&si->reclaim_work); } } static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); unsigned int i; /* * Use atomic clear_bit operations only on zeromap instead of non-atomic * bitmap_clear to prevent adjacent bits corruption due to simultaneous writes. */ for (i = 0; i < nr_entries; i++) { clear_bit(offset + i, si->zeromap); zswap_invalidate(swp_entry(si->type, offset + i)); } if (si->flags & SWP_BLKDEV) swap_slot_free_notify = si->bdev->bd_disk->fops->swap_slot_free_notify; else swap_slot_free_notify = NULL; while (offset <= end) { arch_swap_invalidate_page(si->type, offset); if (swap_slot_free_notify) swap_slot_free_notify(si->bdev, offset); offset++; } clear_shadow_from_swap_cache(si->type, begin, end); /* * Make sure that try_to_unuse() observes si->inuse_pages reaching 0 * only after the above cleanups are done. */ smp_wmb(); atomic_long_add(nr_entries, &nr_swap_pages); swap_usage_sub(si, nr_entries); } static int cluster_alloc_swap(struct swap_info_struct *si, unsigned char usage, int nr, swp_entry_t slots[], int order) { int n_ret = 0; while (n_ret < nr) { unsigned long offset = cluster_alloc_swap_entry(si, order, usage); if (!offset) break; slots[n_ret++] = swp_entry(si->type, offset); } return n_ret; } static int scan_swap_map_slots(struct swap_info_struct *si, unsigned char usage, int nr, swp_entry_t slots[], int order) { unsigned int nr_pages = 1 << order; /* * We try to cluster swap pages by allocating them sequentially * in swap. Once we've allocated SWAPFILE_CLUSTER pages this * way, however, we resort to first-free allocation, starting * a new cluster. This prevents us from scattering swap pages * all over the entire swap partition, so that we reduce * overall disk seek times between swap pages. -- sct * But we do now try to find an empty cluster. -Andrea * And we let swap pages go all over an SSD partition. Hugh */ if (order > 0) { /* * Should not even be attempting large allocations when huge * page swap is disabled. Warn and fail the allocation. */ if (!IS_ENABLED(CONFIG_THP_SWAP) || nr_pages > SWAPFILE_CLUSTER) { VM_WARN_ON_ONCE(1); return 0; } /* * Swapfile is not block device so unable * to allocate large entries. */ if (!(si->flags & SWP_BLKDEV)) return 0; } return cluster_alloc_swap(si, usage, nr, slots, order); } static bool get_swap_device_info(struct swap_info_struct *si) { if (!percpu_ref_tryget_live(&si->users)) return false; /* * Guarantee the si->users are checked before accessing other * fields of swap_info_struct, and si->flags (SWP_WRITEOK) is * up to dated. * * Paired with the spin_unlock() after setup_swap_info() in * enable_swap_info(), and smp_wmb() in swapoff. */ smp_rmb(); return true; } int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) { int order = swap_entry_order(entry_order); unsigned long size = 1 << order; struct swap_info_struct *si, *next; long avail_pgs; int n_ret = 0; int node; spin_lock(&swap_avail_lock); avail_pgs = atomic_long_read(&nr_swap_pages) / size; if (avail_pgs <= 0) { spin_unlock(&swap_avail_lock); goto noswap; } n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs); atomic_long_sub(n_goal * size, &nr_swap_pages); start_over: node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { /* requeue si to after same-priority siblings */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); if (get_swap_device_info(si)) { n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal, swp_entries, order); put_swap_device(si); if (n_ret || size > 1) goto check_out; } spin_lock(&swap_avail_lock); /* * if we got here, it's likely that si was almost full before, * and since scan_swap_map_slots() can drop the si->lock, * multiple callers probably all tried to get a page from the * same si and it filled up before we could get one; or, the si * filled up between us dropping swap_avail_lock and taking * si->lock. Since we dropped the swap_avail_lock, the * swap_avail_head list may have been modified; so if next is * still in the swap_avail_head list then try it, otherwise * start over if we have not gotten any slots. */ if (plist_node_empty(&next->avail_lists[node])) goto start_over; } spin_unlock(&swap_avail_lock); check_out: if (n_ret < n_goal) atomic_long_add((long)(n_goal - n_ret) * size, &nr_swap_pages); noswap: return n_ret; } static struct swap_info_struct *_swap_info_get(swp_entry_t entry) { struct swap_info_struct *si; unsigned long offset; if (!entry.val) goto out; si = swp_swap_info(entry); if (!si) goto bad_nofile; if (data_race(!(si->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= si->max) goto bad_offset; if (data_race(!si->swap_map[swp_offset(entry)])) goto bad_free; return si; bad_free: pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val); goto out; bad_offset: pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val); goto out; bad_device: pr_err("%s: %s%08lx\n", __func__, Unused_file, entry.val); goto out; bad_nofile: pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val); out: return NULL; } static unsigned char __swap_entry_free_locked(struct swap_info_struct *si, unsigned long offset, unsigned char usage) { unsigned char count; unsigned char has_cache; count = si->swap_map[offset]; has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; if (usage == SWAP_HAS_CACHE) { VM_BUG_ON(!has_cache); has_cache = 0; } else if (count == SWAP_MAP_SHMEM) { /* * Or we could insist on shmem.c using a special * swap_shmem_free() and free_shmem_swap_and_cache()... */ count = 0; } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) { if (count == COUNT_CONTINUED) { if (swap_count_continued(si, offset, count)) count = SWAP_MAP_MAX | COUNT_CONTINUED; else count = SWAP_MAP_MAX; } else count--; } usage = count | has_cache; if (usage) WRITE_ONCE(si->swap_map[offset], usage); else WRITE_ONCE(si->swap_map[offset], SWAP_HAS_CACHE); return usage; } /* * When we get a swap entry, if there aren't some other ways to * prevent swapoff, such as the folio in swap cache is locked, RCU * reader side is locked, etc., the swap entry may become invalid * because of swapoff. Then, we need to enclose all swap related * functions with get_swap_device() and put_swap_device(), unless the * swap functions call get/put_swap_device() by themselves. * * RCU reader side lock (including any spinlock) is sufficient to * prevent swapoff, because synchronize_rcu() is called in swapoff() * before freeing data structures. * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until * put_swap_device() is called. Otherwise return NULL. * * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way * to prevent swapoff. The caller must be prepared for that. For * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() * ... swapoff+swapon * __read_swap_cache_async() * swapcache_prepare() * __swap_duplicate() * // check swap_map * // verify PTE not changed * * In __swap_duplicate(), the swap_map need to be checked before * changing partly because the specified swap entry may be for another * swap device which has been swapoff. And in do_swap_page(), after * the page is read from the swap device, the PTE is verified not * changed with the page table locked to check whether the swap device * has been swapoff or swapoff+swapon. */ struct swap_info_struct *get_swap_device(swp_entry_t entry) { struct swap_info_struct *si; unsigned long offset; if (!entry.val) goto out; si = swp_swap_info(entry); if (!si) goto bad_nofile; if (!get_swap_device_info(si)) goto out; offset = swp_offset(entry); if (offset >= si->max) goto put_out; return si; bad_nofile: pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val); out: return NULL; put_out: pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val); percpu_ref_put(&si->users); return NULL; } static unsigned char __swap_entry_free(struct swap_info_struct *si, swp_entry_t entry) { struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); unsigned char usage; ci = lock_cluster(si, offset); usage = __swap_entry_free_locked(si, offset, 1); if (!usage) swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1); unlock_cluster(ci); return usage; } static bool __swap_entries_free(struct swap_info_struct *si, swp_entry_t entry, int nr) { unsigned long offset = swp_offset(entry); unsigned int type = swp_type(entry); struct swap_cluster_info *ci; bool has_cache = false; unsigned char count; int i; if (nr <= 1 || swap_count(data_race(si->swap_map[offset])) != 1) goto fallback; /* cross into another cluster */ if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER) goto fallback; ci = lock_cluster(si, offset); if (!swap_is_last_map(si, offset, nr, &has_cache)) { unlock_cluster(ci); goto fallback; } for (i = 0; i < nr; i++) WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE); if (!has_cache) swap_entry_range_free(si, ci, entry, nr); unlock_cluster(ci); return has_cache; fallback: for (i = 0; i < nr; i++) { if (data_race(si->swap_map[offset + i])) { count = __swap_entry_free(si, swp_entry(type, offset + i)); if (count == SWAP_HAS_CACHE) has_cache = true; } else { WARN_ON_ONCE(1); } } return has_cache; } /* * Drop the last HAS_CACHE flag of swap entries, caller have to * ensure all entries belong to the same cgroup. */ static void swap_entry_range_free(struct swap_info_struct *si, struct swap_cluster_info *ci, swp_entry_t entry, unsigned int nr_pages) { unsigned long offset = swp_offset(entry); unsigned char *map = si->swap_map + offset; unsigned char *map_end = map + nr_pages; /* It should never free entries across different clusters */ VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1)); VM_BUG_ON(cluster_is_empty(ci)); VM_BUG_ON(ci->count < nr_pages); ci->count -= nr_pages; do { VM_BUG_ON(*map != SWAP_HAS_CACHE); *map = 0; } while (++map < map_end); mem_cgroup_uncharge_swap(entry, nr_pages); swap_range_free(si, offset, nr_pages); if (!ci->count) free_cluster(si, ci); else partial_free_cluster(si, ci); } static void cluster_swap_free_nr(struct swap_info_struct *si, unsigned long offset, int nr_pages, unsigned char usage) { struct swap_cluster_info *ci; unsigned long end = offset + nr_pages; ci = lock_cluster(si, offset); do { if (!__swap_entry_free_locked(si, offset, usage)) swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1); } while (++offset < end); unlock_cluster(ci); } /* * Caller has made sure that the swap device corresponding to entry * is still around or has not been recycled. */ void swap_free_nr(swp_entry_t entry, int nr_pages) { int nr; struct swap_info_struct *sis; unsigned long offset = swp_offset(entry); sis = _swap_info_get(entry); if (!sis) return; while (nr_pages) { nr = min_t(int, nr_pages, SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); cluster_swap_free_nr(sis, offset, nr, 1); offset += nr; nr_pages -= nr; } } /* * Called after dropping swapcache to decrease refcnt to swap entries. */ void put_swap_folio(struct folio *folio, swp_entry_t entry) { unsigned long offset = swp_offset(entry); struct swap_cluster_info *ci; struct swap_info_struct *si; int size = 1 << swap_entry_order(folio_order(folio)); si = _swap_info_get(entry); if (!si) return; ci = lock_cluster(si, offset); if (swap_is_has_cache(si, offset, size)) swap_entry_range_free(si, ci, entry, size); else { for (int i = 0; i < size; i++, entry.val++) { if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) swap_entry_range_free(si, ci, entry, 1); } } unlock_cluster(ci); } void swapcache_free_entries(swp_entry_t *entries, int n) { int i; struct swap_cluster_info *ci; struct swap_info_struct *si = NULL; if (n <= 0) return; for (i = 0; i < n; ++i) { si = _swap_info_get(entries[i]); if (si) { ci = lock_cluster(si, swp_offset(entries[i])); swap_entry_range_free(si, ci, entries[i], 1); unlock_cluster(ci); } } } int __swap_count(swp_entry_t entry) { struct swap_info_struct *si = swp_swap_info(entry); pgoff_t offset = swp_offset(entry); return swap_count(si->swap_map[offset]); } /* * How many references to @entry are currently swapped out? * This does not give an exact answer when swap count is continued, * but does include the high COUNT_CONTINUED flag to allow for that. */ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) { pgoff_t offset = swp_offset(entry); struct swap_cluster_info *ci; int count; ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); unlock_cluster(ci); return count; } /* * How many references to @entry are currently swapped out? * This considers COUNT_CONTINUED so it returns exact answer. */ int swp_swapcount(swp_entry_t entry) { int count, tmp_count, n; struct swap_info_struct *si; struct swap_cluster_info *ci; struct page *page; pgoff_t offset; unsigned char *map; si = _swap_info_get(entry); if (!si) return 0; offset = swp_offset(entry); ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); if (!(count & COUNT_CONTINUED)) goto out; count &= ~COUNT_CONTINUED; n = SWAP_MAP_MAX + 1; page = vmalloc_to_page(si->swap_map + offset); offset &= ~PAGE_MASK; VM_BUG_ON(page_private(page) != SWP_CONTINUED); do { page = list_next_entry(page, lru); map = kmap_local_page(page); tmp_count = map[offset]; kunmap_local(map); count += (tmp_count & ~COUNT_CONTINUED) * n; n *= (SWAP_CONT_MAX + 1); } while (tmp_count & COUNT_CONTINUED); out: unlock_cluster(ci); return count; } static bool swap_page_trans_huge_swapped(struct swap_info_struct *si, swp_entry_t entry, int order) { struct swap_cluster_info *ci; unsigned char *map = si->swap_map; unsigned int nr_pages = 1 << order; unsigned long roffset = swp_offset(entry); unsigned long offset = round_down(roffset, nr_pages); int i; bool ret = false; ci = lock_cluster(si, offset); if (nr_pages == 1) { if (swap_count(map[roffset])) ret = true; goto unlock_out; } for (i = 0; i < nr_pages; i++) { if (swap_count(map[offset + i])) { ret = true; break; } } unlock_out: unlock_cluster(ci); return ret; } static bool folio_swapped(struct folio *folio) { swp_entry_t entry = folio->swap; struct swap_info_struct *si = _swap_info_get(entry); if (!si) return false; if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio))) return swap_swapcount(si, entry) != 0; return swap_page_trans_huge_swapped(si, entry, folio_order(folio)); } static bool folio_swapcache_freeable(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (!folio_test_swapcache(folio)) return false; if (folio_test_writeback(folio)) return false; /* * Once hibernation has begun to create its image of memory, * there's a danger that one of the calls to folio_free_swap() * - most probably a call from __try_to_reclaim_swap() while * hibernation is allocating its own swap pages for the image, * but conceivably even a call from memory reclaim - will free * the swap from a folio which has already been recorded in the * image as a clean swapcache folio, and then reuse its swap for * another page of the image. On waking from hibernation, the * original folio might be freed under memory pressure, then * later read back in from swap, now with the wrong data. * * Hibernation suspends storage while it is writing the image * to disk so check that here. */ if (pm_suspended_storage()) return false; return true; } /** * folio_free_swap() - Free the swap space used for this folio. * @folio: The folio to remove. * * If swap is getting full, or if there are no more mappings of this folio, * then call folio_free_swap to free its swap space. * * Return: true if we were able to release the swap space. */ bool folio_free_swap(struct folio *folio) { if (!folio_swapcache_freeable(folio)) return false; if (folio_swapped(folio)) return false; delete_from_swap_cache(folio); folio_set_dirty(folio); return true; } /** * free_swap_and_cache_nr() - Release reference on range of swap entries and * reclaim their cache if no more references remain. * @entry: First entry of range. * @nr: Number of entries in range. * * For each swap entry in the contiguous range, release a reference. If any swap * entries become free, try to reclaim their underlying folios, if present. The * offset range is defined by [entry.offset, entry.offset + nr). */ void free_swap_and_cache_nr(swp_entry_t entry, int nr) { const unsigned long start_offset = swp_offset(entry); const unsigned long end_offset = start_offset + nr; struct swap_info_struct *si; bool any_only_cache = false; unsigned long offset; if (non_swap_entry(entry)) return; si = get_swap_device(entry); if (!si) return; if (WARN_ON(end_offset > si->max)) goto out; /* * First free all entries in the range. */ any_only_cache = __swap_entries_free(si, entry, nr); /* * Short-circuit the below loop if none of the entries had their * reference drop to zero. */ if (!any_only_cache) goto out; /* * Now go back over the range trying to reclaim the swap cache. This is * more efficient for large folios because we will only try to reclaim * the swap once per folio in the common case. If we do * __swap_entry_free() and __try_to_reclaim_swap() in the same loop, the * latter will get a reference and lock the folio for every individual * page but will only succeed once the swap slot for every subpage is * zero. */ for (offset = start_offset; offset < end_offset; offset += nr) { nr = 1; if (READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { /* * Folios are always naturally aligned in swap so * advance forward to the next boundary. Zero means no * folio was found for the swap entry, so advance by 1 * in this case. Negative value means folio was found * but could not be reclaimed. Here we can still advance * to the next boundary. */ nr = __try_to_reclaim_swap(si, offset, TTRS_UNMAPPED | TTRS_FULL); if (nr == 0) nr = 1; else if (nr < 0) nr = -nr; nr = ALIGN(offset + 1, nr) - offset; } } out: put_swap_device(si); } #ifdef CONFIG_HIBERNATION swp_entry_t get_swap_page_of_type(int type) { struct swap_info_struct *si = swap_type_to_swap_info(type); swp_entry_t entry = {0}; if (!si) goto fail; /* This is called for allocating swap entry, not cache */ if (get_swap_device_info(si)) { if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0)) atomic_long_dec(&nr_swap_pages); put_swap_device(si); } fail: return entry; } /* * Find the swap type that corresponds to given device (if any). * * @offset - number of the PAGE_SIZE-sized block of the device, starting * from 0, in which the swap header is expected to be located. * * This is needed for the suspend to disk (aka swsusp). */ int swap_type_of(dev_t device, sector_t offset) { int type; if (!device) return -1; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *sis = swap_info[type]; if (!(sis->flags & SWP_WRITEOK)) continue; if (device == sis->bdev->bd_dev) { struct swap_extent *se = first_se(sis); if (se->start_block == offset) { spin_unlock(&swap_lock); return type; } } } spin_unlock(&swap_lock); return -ENODEV; } int find_first_swap(dev_t *device) { int type; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *sis = swap_info[type]; if (!(sis->flags & SWP_WRITEOK)) continue; *device = sis->bdev->bd_dev; spin_unlock(&swap_lock); return type; } spin_unlock(&swap_lock); return -ENODEV; } /* * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev * corresponding to given index in swap_info (swap type). */ sector_t swapdev_block(int type, pgoff_t offset) { struct swap_info_struct *si = swap_type_to_swap_info(type); struct swap_extent *se; if (!si || !(si->flags & SWP_WRITEOK)) return 0; se = offset_to_swap_extent(si, offset); return se->start_block + (offset - se->start_page); } /* * Return either the total number of swap pages of given type, or the number * of free pages of that type (depending on @free) * * This is needed for software suspend */ unsigned int count_swap_pages(int type, int free) { unsigned int n = 0; spin_lock(&swap_lock); if ((unsigned int)type < nr_swapfiles) { struct swap_info_struct *sis = swap_info[type]; spin_lock(&sis->lock); if (sis->flags & SWP_WRITEOK) { n = sis->pages; if (free) n -= swap_usage_in_pages(sis); } spin_unlock(&sis->lock); } spin_unlock(&swap_lock); return n; } #endif /* CONFIG_HIBERNATION */ static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte) { return pte_same(pte_swp_clear_flags(pte), swp_pte); } /* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct folio *folio) { struct page *page; struct folio *swapcache; spinlock_t *ptl; pte_t *pte, new_pte, old_pte; bool hwpoisoned = false; int ret = 1; swapcache = folio; folio = ksm_might_need_to_copy(folio, vma, addr); if (unlikely(!folio)) return -ENOMEM; else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { hwpoisoned = true; folio = swapcache; } page = folio_file_page(folio, swp_offset(entry)); if (PageHWPoison(page)) hwpoisoned = true; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte), swp_entry_to_pte(entry)))) { ret = 0; goto out; } old_pte = ptep_get(pte); if (unlikely(hwpoisoned || !folio_test_uptodate(folio))) { swp_entry_t swp_entry; dec_mm_counter(vma->vm_mm, MM_SWAPENTS); if (hwpoisoned) { swp_entry = make_hwpoison_entry(page); } else { swp_entry = make_poisoned_swp_entry(); } new_pte = swp_entry_to_pte(swp_entry); ret = 0; goto setpte; } /* * Some architectures may have to restore extra metadata to the page * when reading from swap. This metadata may be indexed by swap entry * so this must be called before swap_free(). */ arch_swap_restore(folio_swap(entry, folio), folio); dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); folio_get(folio); if (folio == swapcache) { rmap_t rmap_flags = RMAP_NONE; /* * See do_swap_page(): writeback would be problematic. * However, we do a folio_wait_writeback() just before this * call and have the folio locked. */ VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); if (pte_swp_exclusive(old_pte)) rmap_flags |= RMAP_EXCLUSIVE; /* * We currently only expect small !anon folios, which are either * fully exclusive or fully shared. If we ever get large folios * here, we have to be careful. */ if (!folio_test_anon(folio)) { VM_WARN_ON_ONCE(folio_test_large(folio)); VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); folio_add_new_anon_rmap(folio, vma, addr, rmap_flags); } else { folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags); } } else { /* ksm created a completely new copy */ folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE); folio_add_lru_vma(folio, vma); } new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); if (pte_swp_soft_dirty(old_pte)) new_pte = pte_mksoft_dirty(new_pte); if (pte_swp_uffd_wp(old_pte)) new_pte = pte_mkuffd_wp(new_pte); setpte: set_pte_at(vma->vm_mm, addr, pte, new_pte); swap_free(entry); out: if (pte) pte_unmap_unlock(pte, ptl); if (folio != swapcache) { folio_unlock(folio); folio_put(folio); } return ret; } static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned int type) { pte_t *pte = NULL; struct swap_info_struct *si; si = swap_info[type]; do { struct folio *folio; unsigned long offset; unsigned char swp_count; swp_entry_t entry; int ret; pte_t ptent; if (!pte++) { pte = pte_offset_map(pmd, addr); if (!pte) break; } ptent = ptep_get_lockless(pte); if (!is_swap_pte(ptent)) continue; entry = pte_to_swp_entry(ptent); if (swp_type(entry) != type) continue; offset = swp_offset(entry); pte_unmap(pte); pte = NULL; folio = swap_cache_get_folio(entry, vma, addr); if (!folio) { struct vm_fault vmf = { .vma = vma, .address = addr, .real_address = addr, .pmd = pmd, }; folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf); } if (!folio) { swp_count = READ_ONCE(si->swap_map[offset]); if (swp_count == 0 || swp_count == SWAP_MAP_BAD) continue; return -ENOMEM; } folio_lock(folio); folio_wait_writeback(folio); ret = unuse_pte(vma, pmd, addr, entry, folio); if (ret < 0) { folio_unlock(folio); folio_put(folio); return ret; } folio_free_swap(folio); folio_unlock(folio); folio_put(folio); } while (addr += PAGE_SIZE, addr != end); if (pte) pte_unmap(pte); return 0; } static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, unsigned int type) { pmd_t *pmd; unsigned long next; int ret; pmd = pmd_offset(pud, addr); do { cond_resched(); next = pmd_addr_end(addr, end); ret = unuse_pte_range(vma, pmd, addr, next, type); if (ret) return ret; } while (pmd++, addr = next, addr != end); return 0; } static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned int type) { pud_t *pud; unsigned long next; int ret; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; ret = unuse_pmd_range(vma, pud, addr, next, type); if (ret) return ret; } while (pud++, addr = next, addr != end); return 0; } static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned int type) { p4d_t *p4d; unsigned long next; int ret; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; ret = unuse_pud_range(vma, p4d, addr, next, type); if (ret) return ret; } while (p4d++, addr = next, addr != end); return 0; } static int unuse_vma(struct vm_area_struct *vma, unsigned int type) { pgd_t *pgd; unsigned long addr, end, next; int ret; addr = vma->vm_start; end = vma->vm_end; pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; ret = unuse_p4d_range(vma, pgd, addr, next, type); if (ret) return ret; } while (pgd++, addr = next, addr != end); return 0; } static int unuse_mm(struct mm_struct *mm, unsigned int type) { struct vm_area_struct *vma; int ret = 0; VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); for_each_vma(vmi, vma) { if (vma->anon_vma && !is_vm_hugetlb_page(vma)) { ret = unuse_vma(vma, type); if (ret) break; } cond_resched(); } mmap_read_unlock(mm); return ret; } /* * Scan swap_map from current position to next entry still in use. * Return 0 if there are no inuse entries after prev till end of * the map. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, unsigned int prev) { unsigned int i; unsigned char count; /* * No need for swap_lock here: we're just looking * for whether an entry is in use, not modifying it; false * hits are okay, and sys_swapoff() has already prevented new * allocations from this area (while holding swap_lock). */ for (i = prev + 1; i < si->max; i++) { count = READ_ONCE(si->swap_map[i]); if (count && swap_count(count) != SWAP_MAP_BAD) break; if ((i % LATENCY_LIMIT) == 0) cond_resched(); } if (i == si->max) i = 0; return i; } static int try_to_unuse(unsigned int type) { struct mm_struct *prev_mm; struct mm_struct *mm; struct list_head *p; int retval = 0; struct swap_info_struct *si = swap_info[type]; struct folio *folio; swp_entry_t entry; unsigned int i; if (!swap_usage_in_pages(si)) goto success; retry: retval = shmem_unuse(type); if (retval) return retval; prev_mm = &init_mm; mmget(prev_mm); spin_lock(&mmlist_lock); p = &init_mm.mmlist; while (swap_usage_in_pages(si) && !signal_pending(current) && (p = p->next) != &init_mm.mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!mmget_not_zero(mm)) continue; spin_unlock(&mmlist_lock); mmput(prev_mm); prev_mm = mm; retval = unuse_mm(mm, type); if (retval) { mmput(prev_mm); return retval; } /* * Make sure that we aren't completely killing * interactive performance. */ cond_resched(); spin_lock(&mmlist_lock); } spin_unlock(&mmlist_lock); mmput(prev_mm); i = 0; while (swap_usage_in_pages(si) && !signal_pending(current) && (i = find_next_to_unuse(si, i)) != 0) { entry = swp_entry(type, i); folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry)); if (IS_ERR(folio)) continue; /* * It is conceivable that a racing task removed this folio from * swap cache just before we acquired the page lock. The folio * might even be back in swap cache on another swap area. But * that is okay, folio_free_swap() only removes stale folios. */ folio_lock(folio); folio_wait_writeback(folio); folio_free_swap(folio); folio_unlock(folio); folio_put(folio); } /* * Lets check again to see if there are still swap entries in the map. * If yes, we would need to do retry the unuse logic again. * Under global memory pressure, swap entries can be reinserted back * into process space after the mmlist loop above passes over them. * * Limit the number of retries? No: when mmget_not_zero() * above fails, that mm is likely to be freeing swap from * exit_mmap(), which proceeds at its own independent pace; * and even shmem_writepage() could have been preempted after * folio_alloc_swap(), temporarily hiding that swap. It's easy * and robust (though cpu-intensive) just to keep retrying. */ if (swap_usage_in_pages(si)) { if (!signal_pending(current)) goto retry; return -EINTR; } success: /* * Make sure that further cleanups after try_to_unuse() returns happen * after swap_range_free() reduces si->inuse_pages to 0. */ smp_mb(); return 0; } /* * After a successful try_to_unuse, if no swap is now in use, we know * we can empty the mmlist. swap_lock must be held on entry and exit. * Note that mmlist_lock nests inside swap_lock, and an mm must be * added to the mmlist just after page_duplicate - before would be racy. */ static void drain_mmlist(void) { struct list_head *p, *next; unsigned int type; for (type = 0; type < nr_swapfiles; type++) if (swap_usage_in_pages(swap_info[type])) return; spin_lock(&mmlist_lock); list_for_each_safe(p, next, &init_mm.mmlist) list_del_init(p); spin_unlock(&mmlist_lock); } /* * Free all of a swapdev's extent information */ static void destroy_swap_extents(struct swap_info_struct *sis) { while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { struct rb_node *rb = sis->swap_extent_root.rb_node; struct swap_extent *se = rb_entry(rb, struct swap_extent, rb_node); rb_erase(rb, &sis->swap_extent_root); kfree(se); } if (sis->flags & SWP_ACTIVATED) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; sis->flags &= ~SWP_ACTIVATED; if (mapping->a_ops->swap_deactivate) mapping->a_ops->swap_deactivate(swap_file); } } /* * Add a block range (and the corresponding page range) into this swapdev's * extent tree. * * This function rather assumes that it is called in ascending page order. */ int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block) { struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; struct swap_extent *se; struct swap_extent *new_se; /* * place the new node at the right most since the * function is called in ascending page order. */ while (*link) { parent = *link; link = &parent->rb_right; } if (parent) { se = rb_entry(parent, struct swap_extent, rb_node); BUG_ON(se->start_page + se->nr_pages != start_page); if (se->start_block + se->nr_pages == start_block) { /* Merge it */ se->nr_pages += nr_pages; return 0; } } /* No merge, insert a new extent. */ new_se = kmalloc(sizeof(*se), GFP_KERNEL); if (new_se == NULL) return -ENOMEM; new_se->start_page = start_page; new_se->nr_pages = nr_pages; new_se->start_block = start_block; rb_link_node(&new_se->rb_node, parent, link); rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); return 1; } EXPORT_SYMBOL_GPL(add_swap_extent); /* * A `swap extent' is a simple thing which maps a contiguous range of pages * onto a contiguous range of disk blocks. A rbtree of swap extents is * built at swapon time and is then used at swap_writepage/swap_read_folio * time for locating where on disk a page belongs. * * If the swapfile is an S_ISBLK block device, a single extent is installed. * This is done so that the main operating code can treat S_ISBLK and S_ISREG * swap files identically. * * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap * extent rbtree operates in PAGE_SIZE disk blocks. Both S_ISREG and S_ISBLK * swapfiles are handled *identically* after swapon time. * * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks * and will parse them into a rbtree, in PAGE_SIZE chunks. If some stray * blocks are found which do not fall within the PAGE_SIZE alignment * requirements, they are simply tossed out - we will never use those blocks * for swapping. * * For all swap devices we set S_SWAPFILE across the life of the swapon. This * prevents users from writing to the swap device, which will corrupt memory. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range. So we can have hundreds of * extents in the rbtree. - akpm. */ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; int ret; if (S_ISBLK(inode->i_mode)) { ret = add_swap_extent(sis, 0, sis->max, 0); *span = sis->pages; return ret; } if (mapping->a_ops->swap_activate) { ret = mapping->a_ops->swap_activate(sis, swap_file, span); if (ret < 0) return ret; sis->flags |= SWP_ACTIVATED; if ((sis->flags & SWP_FS_OPS) && sio_pool_init() != 0) { destroy_swap_extents(sis); return -ENOMEM; } return ret; } return generic_swapfile_activate(sis, swap_file, span); } static int swap_node(struct swap_info_struct *si) { struct block_device *bdev; if (si->bdev) bdev = si->bdev; else bdev = si->swap_file->f_inode->i_sb->s_bdev; return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; } static void setup_swap_info(struct swap_info_struct *si, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info, unsigned long *zeromap) { int i; if (prio >= 0) si->prio = prio; else si->prio = --least_priority; /* * the plist prio is negated because plist ordering is * low-to-high, while swap ordering is high-to-low */ si->list.prio = -si->prio; for_each_node(i) { if (si->prio >= 0) si->avail_lists[i].prio = -si->prio; else { if (swap_node(si) == i) si->avail_lists[i].prio = 1; else si->avail_lists[i].prio = -si->prio; } } si->swap_map = swap_map; si->cluster_info = cluster_info; si->zeromap = zeromap; } static void _enable_swap_info(struct swap_info_struct *si) { atomic_long_add(si->pages, &nr_swap_pages); total_swap_pages += si->pages; assert_spin_locked(&swap_lock); /* * both lists are plists, and thus priority ordered. * swap_active_head needs to be priority ordered for swapoff(), * which on removal of any swap_info_struct with an auto-assigned * (i.e. negative) priority increments the auto-assigned priority * of any lower-priority swap_info_structs. * swap_avail_head needs to be priority ordered for folio_alloc_swap(), * which allocates swap pages from the highest available priority * swap_info_struct. */ plist_add(&si->list, &swap_active_head); /* Add back to available list */ add_to_avail_list(si, true); } static void enable_swap_info(struct swap_info_struct *si, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info, unsigned long *zeromap) { spin_lock(&swap_lock); spin_lock(&si->lock); setup_swap_info(si, prio, swap_map, cluster_info, zeromap); spin_unlock(&si->lock); spin_unlock(&swap_lock); /* * Finished initializing swap device, now it's safe to reference it. */ percpu_ref_resurrect(&si->users); spin_lock(&swap_lock); spin_lock(&si->lock); _enable_swap_info(si); spin_unlock(&si->lock); spin_unlock(&swap_lock); } static void reinsert_swap_info(struct swap_info_struct *si) { spin_lock(&swap_lock); spin_lock(&si->lock); setup_swap_info(si, si->prio, si->swap_map, si->cluster_info, si->zeromap); _enable_swap_info(si); spin_unlock(&si->lock); spin_unlock(&swap_lock); } static bool __has_usable_swap(void) { return !plist_head_empty(&swap_active_head); } bool has_usable_swap(void) { bool ret; spin_lock(&swap_lock); ret = __has_usable_swap(); spin_unlock(&swap_lock); return ret; } /* * Called after clearing SWP_WRITEOK, ensures cluster_alloc_range * see the updated flags, so there will be no more allocations. */ static void wait_for_allocation(struct swap_info_struct *si) { unsigned long offset; unsigned long end = ALIGN(si->max, SWAPFILE_CLUSTER); struct swap_cluster_info *ci; BUG_ON(si->flags & SWP_WRITEOK); for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) { ci = lock_cluster(si, offset); unlock_cluster(ci); offset += SWAPFILE_CLUSTER; } } SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; unsigned char *swap_map; unsigned long *zeromap; struct swap_cluster_info *cluster_info; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; struct filename *pathname; int err, found = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; BUG_ON(!current->mm); pathname = getname(specialfile); if (IS_ERR(pathname)) return PTR_ERR(pathname); victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); err = PTR_ERR(victim); if (IS_ERR(victim)) goto out; mapping = victim->f_mapping; spin_lock(&swap_lock); plist_for_each_entry(p, &swap_active_head, list) { if (p->flags & SWP_WRITEOK) { if (p->swap_file->f_mapping == mapping) { found = 1; break; } } } if (!found) { err = -EINVAL; spin_unlock(&swap_lock); goto out_dput; } if (!security_vm_enough_memory_mm(current->mm, p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; spin_unlock(&swap_lock); goto out_dput; } spin_lock(&p->lock); del_from_avail_list(p, true); if (p->prio < 0) { struct swap_info_struct *si = p; int nid; plist_for_each_entry_continue(si, &swap_active_head, list) { si->prio++; si->list.prio--; for_each_node(nid) { if (si->avail_lists[nid].prio != 1) si->avail_lists[nid].prio--; } } least_priority++; } plist_del(&p->list, &swap_active_head); atomic_long_sub(p->pages, &nr_swap_pages); total_swap_pages -= p->pages; spin_unlock(&p->lock); spin_unlock(&swap_lock); wait_for_allocation(p); disable_swap_slots_cache_lock(); set_current_oom_origin(); err = try_to_unuse(p->type); clear_current_oom_origin(); if (err) { /* re-insert swap space back into swap_list */ reinsert_swap_info(p); reenable_swap_slots_cache_unlock(); goto out_dput; } reenable_swap_slots_cache_unlock(); /* * Wait for swap operations protected by get/put_swap_device() * to complete. Because of synchronize_rcu() here, all swap * operations protected by RCU reader side lock (including any * spinlock) will be waited too. This makes it easy to * prevent folio_test_swapcache() and the following swap cache * operations from racing with swapoff. */ percpu_ref_kill(&p->users); synchronize_rcu(); wait_for_completion(&p->comp); flush_work(&p->discard_work); flush_work(&p->reclaim_work); destroy_swap_extents(p); if (p->flags & SWP_CONTINUED) free_swap_count_continuations(p); if (!p->bdev || !bdev_nonrot(p->bdev)) atomic_dec(&nr_rotate_swap); mutex_lock(&swapon_mutex); spin_lock(&swap_lock); spin_lock(&p->lock); drain_mmlist(); swap_file = p->swap_file; p->swap_file = NULL; p->max = 0; swap_map = p->swap_map; p->swap_map = NULL; zeromap = p->zeromap; p->zeromap = NULL; cluster_info = p->cluster_info; p->cluster_info = NULL; spin_unlock(&p->lock); spin_unlock(&swap_lock); arch_swap_invalidate_area(p->type); zswap_swapoff(p->type); mutex_unlock(&swapon_mutex); free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; kfree(p->global_cluster); p->global_cluster = NULL; vfree(swap_map); kvfree(zeromap); kvfree(cluster_info); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); exit_swap_address_space(p->type); inode = mapping->host; inode_lock(inode); inode->i_flags &= ~S_SWAPFILE; inode_unlock(inode); filp_close(swap_file, NULL); /* * Clear the SWP_USED flag after all resources are freed so that swapon * can reuse this swap_info in alloc_swap_info() safely. It is ok to * not hold p->lock after we cleared its SWP_WRITEOK. */ spin_lock(&swap_lock); p->flags = 0; spin_unlock(&swap_lock); err = 0; atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); out_dput: filp_close(victim, NULL); out: putname(pathname); return err; } #ifdef CONFIG_PROC_FS static __poll_t swaps_poll(struct file *file, poll_table *wait) { struct seq_file *seq = file->private_data; poll_wait(file, &proc_poll_wait, wait); if (seq->poll_event != atomic_read(&proc_poll_event)) { seq->poll_event = atomic_read(&proc_poll_event); return EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI; } return EPOLLIN | EPOLLRDNORM; } /* iterator */ static void *swap_start(struct seq_file *swap, loff_t *pos) { struct swap_info_struct *si; int type; loff_t l = *pos; mutex_lock(&swapon_mutex); if (!l) return SEQ_START_TOKEN; for (type = 0; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; if (!--l) return si; } return NULL; } static void *swap_next(struct seq_file *swap, void *v, loff_t *pos) { struct swap_info_struct *si = v; int type; if (v == SEQ_START_TOKEN) type = 0; else type = si->type + 1; ++(*pos); for (; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; return si; } return NULL; } static void swap_stop(struct seq_file *swap, void *v) { mutex_unlock(&swapon_mutex); } static int swap_show(struct seq_file *swap, void *v) { struct swap_info_struct *si = v; struct file *file; int len; unsigned long bytes, inuse; if (si == SEQ_START_TOKEN) { seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n"); return 0; } bytes = K(si->pages); inuse = K(swap_usage_in_pages(si)); file = si->swap_file; len = seq_file_path(swap, file, " \t\n\\"); seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file_inode(file)->i_mode) ? "partition" : "file\t", bytes, bytes < 10000000 ? "\t" : "", inuse, inuse < 10000000 ? "\t" : "", si->prio); return 0; } static const struct seq_operations swaps_op = { .start = swap_start, .next = swap_next, .stop = swap_stop, .show = swap_show }; static int swaps_open(struct inode *inode, struct file *file) { struct seq_file *seq; int ret; ret = seq_open(file, &swaps_op); if (ret) return ret; seq = file->private_data; seq->poll_event = atomic_read(&proc_poll_event); return 0; } static const struct proc_ops swaps_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = swaps_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = seq_release, .proc_poll = swaps_poll, }; static int __init procswaps_init(void) { proc_create("swaps", 0, NULL, &swaps_proc_ops); return 0; } __initcall(procswaps_init); #endif /* CONFIG_PROC_FS */ #ifdef MAX_SWAPFILES_CHECK static int __init max_swapfiles_check(void) { MAX_SWAPFILES_CHECK(); return 0; } late_initcall(max_swapfiles_check); #endif static struct swap_info_struct *alloc_swap_info(void) { struct swap_info_struct *p; struct swap_info_struct *defer = NULL; unsigned int type; int i; p = kvzalloc(struct_size(p, avail_lists, nr_node_ids), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); if (percpu_ref_init(&p->users, swap_users_ref_free, PERCPU_REF_INIT_DEAD, GFP_KERNEL)) { kvfree(p); return ERR_PTR(-ENOMEM); } spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { if (!(swap_info[type]->flags & SWP_USED)) break; } if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); percpu_ref_exit(&p->users); kvfree(p); return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { p->type = type; /* * Publish the swap_info_struct after initializing it. * Note that kvzalloc() above zeroes all its fields. */ smp_store_release(&swap_info[type], p); /* rcu_assign_pointer() */ nr_swapfiles++; } else { defer = p; p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() * would be relying on p->type to remain valid. */ } p->swap_extent_root = RB_ROOT; plist_node_init(&p->list, 0); for_each_node(i) plist_node_init(&p->avail_lists[i], 0); p->flags = SWP_USED; spin_unlock(&swap_lock); if (defer) { percpu_ref_exit(&defer->users); kvfree(defer); } spin_lock_init(&p->lock); spin_lock_init(&p->cont_lock); atomic_long_set(&p->inuse_pages, SWAP_USAGE_OFFLIST_BIT); init_completion(&p->comp); return p; } static int claim_swapfile(struct swap_info_struct *si, struct inode *inode) { if (S_ISBLK(inode->i_mode)) { si->bdev = I_BDEV(inode); /* * Zoned block devices contain zones that have a sequential * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ if (bdev_is_zoned(si->bdev)) return -EINVAL; si->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { si->bdev = inode->i_sb->s_bdev; } return 0; } /* * Find out how many pages are allowed for a single swap device. There * are two limiting factors: * 1) the number of bits for the swap offset in the swp_entry_t type, and * 2) the number of bits in the swap pte, as defined by the different * architectures. * * In order to find the largest possible bit mask, a swap entry with * swap type 0 and swap offset ~0UL is created, encoded to a swap pte, * decoded to a swp_entry_t again, and finally the swap offset is * extracted. * * This will mask all the bits from the initial ~0UL mask that can't * be encoded in either the swp_entry_t or the architecture definition * of a swap pte. */ unsigned long generic_max_swapfile_size(void) { return swp_offset(pte_to_swp_entry( swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; } /* Can be overridden by an architecture for additional checks. */ __weak unsigned long arch_max_swapfile_size(void) { return generic_max_swapfile_size(); } static unsigned long read_swap_header(struct swap_info_struct *si, union swap_header *swap_header, struct inode *inode) { int i; unsigned long maxpages; unsigned long swapfilepages; unsigned long last_page; if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { pr_err("Unable to find swap-space signature\n"); return 0; } /* swap partition endianness hack... */ if (swab32(swap_header->info.version) == 1) { swab32s(&swap_header->info.version); swab32s(&swap_header->info.last_page); swab32s(&swap_header->info.nr_badpages); if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) return 0; for (i = 0; i < swap_header->info.nr_badpages; i++) swab32s(&swap_header->info.badpages[i]); } /* Check the swap header's sub-version */ if (swap_header->info.version != 1) { pr_warn("Unable to handle swap header version %d\n", swap_header->info.version); return 0; } maxpages = swapfile_maximum_size; last_page = swap_header->info.last_page; if (!last_page) { pr_warn("Empty swap-file\n"); return 0; } if (last_page > maxpages) { pr_warn("Truncating oversized swap area, only using %luk out of %luk\n", K(maxpages), K(last_page)); } if (maxpages > last_page) { maxpages = last_page + 1; /* p->max is an unsigned int: don't overflow it */ if ((unsigned int)maxpages == 0) maxpages = UINT_MAX; } if (!maxpages) return 0; swapfilepages = i_size_read(inode) >> PAGE_SHIFT; if (swapfilepages && maxpages > swapfilepages) { pr_warn("Swap area shorter than signature indicates\n"); return 0; } if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) return 0; if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) return 0; return maxpages; } #define SWAP_CLUSTER_INFO_COLS \ DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) #define SWAP_CLUSTER_SPACE_COLS \ DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) #define SWAP_CLUSTER_COLS \ max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) static int setup_swap_map_and_extents(struct swap_info_struct *si, union swap_header *swap_header, unsigned char *swap_map, unsigned long maxpages, sector_t *span) { unsigned int nr_good_pages; unsigned long i; int nr_extents; nr_good_pages = maxpages - 1; /* omit header page */ for (i = 0; i < swap_header->info.nr_badpages; i++) { unsigned int page_nr = swap_header->info.badpages[i]; if (page_nr == 0 || page_nr > swap_header->info.last_page) return -EINVAL; if (page_nr < maxpages) { swap_map[page_nr] = SWAP_MAP_BAD; nr_good_pages--; } } if (nr_good_pages) { swap_map[0] = SWAP_MAP_BAD; si->max = maxpages; si->pages = nr_good_pages; nr_extents = setup_swap_extents(si, span); if (nr_extents < 0) return nr_extents; nr_good_pages = si->pages; } if (!nr_good_pages) { pr_warn("Empty swap-file\n"); return -EINVAL; } return nr_extents; } static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, union swap_header *swap_header, unsigned long maxpages) { unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); struct swap_cluster_info *cluster_info; unsigned long i, j, k, idx; int cpu, err = -ENOMEM; cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL); if (!cluster_info) goto err; for (i = 0; i < nr_clusters; i++) spin_lock_init(&cluster_info[i].lock); if (si->flags & SWP_SOLIDSTATE) { si->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!si->percpu_cluster) goto err_free; for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; cluster = per_cpu_ptr(si->percpu_cluster, cpu); for (i = 0; i < SWAP_NR_ORDERS; i++) cluster->next[i] = SWAP_ENTRY_INVALID; local_lock_init(&cluster->lock); } } else { si->global_cluster = kmalloc(sizeof(*si->global_cluster), GFP_KERNEL); if (!si->global_cluster) goto err_free; for (i = 0; i < SWAP_NR_ORDERS; i++) si->global_cluster->next[i] = SWAP_ENTRY_INVALID; spin_lock_init(&si->global_cluster_lock); } /* * Mark unusable pages as unavailable. The clusters aren't * marked free yet, so no list operations are involved yet. * * See setup_swap_map_and_extents(): header page, bad pages, * and the EOF part of the last cluster. */ inc_cluster_info_page(si, cluster_info, 0); for (i = 0; i < swap_header->info.nr_badpages; i++) inc_cluster_info_page(si, cluster_info, swap_header->info.badpages[i]); for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++) inc_cluster_info_page(si, cluster_info, i); INIT_LIST_HEAD(&si->free_clusters); INIT_LIST_HEAD(&si->full_clusters); INIT_LIST_HEAD(&si->discard_clusters); for (i = 0; i < SWAP_NR_ORDERS; i++) { INIT_LIST_HEAD(&si->nonfull_clusters[i]); INIT_LIST_HEAD(&si->frag_clusters[i]); atomic_long_set(&si->frag_cluster_nr[i], 0); } /* * Reduce false cache line sharing between cluster_info and * sharing same address space. */ for (k = 0; k < SWAP_CLUSTER_COLS; k++) { j = k % SWAP_CLUSTER_COLS; for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { struct swap_cluster_info *ci; idx = i * SWAP_CLUSTER_COLS + j; ci = cluster_info + idx; if (idx >= nr_clusters) continue; if (ci->count) { ci->flags = CLUSTER_FLAG_NONFULL; list_add_tail(&ci->list, &si->nonfull_clusters[0]); continue; } ci->flags = CLUSTER_FLAG_FREE; list_add_tail(&ci->list, &si->free_clusters); } } return cluster_info; err_free: kvfree(cluster_info); err: return ERR_PTR(err); } SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct *si; struct filename *name; struct file *swap_file = NULL; struct address_space *mapping; struct dentry *dentry; int prio; int error; union swap_header *swap_header; int nr_extents; sector_t span; unsigned long maxpages; unsigned char *swap_map = NULL; unsigned long *zeromap = NULL; struct swap_cluster_info *cluster_info = NULL; struct folio *folio = NULL; struct inode *inode = NULL; bool inced_nr_rotate_swap = false; if (swap_flags & ~SWAP_FLAGS_VALID) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!swap_avail_heads) return -ENOMEM; si = alloc_swap_info(); if (IS_ERR(si)) return PTR_ERR(si); INIT_WORK(&si->discard_work, swap_discard_work); INIT_WORK(&si->reclaim_work, swap_reclaim_work); name = getname(specialfile); if (IS_ERR(name)) { error = PTR_ERR(name); name = NULL; goto bad_swap; } swap_file = file_open_name(name, O_RDWR | O_LARGEFILE | O_EXCL, 0); if (IS_ERR(swap_file)) { error = PTR_ERR(swap_file); swap_file = NULL; goto bad_swap; } si->swap_file = swap_file; mapping = swap_file->f_mapping; dentry = swap_file->f_path.dentry; inode = mapping->host; error = claim_swapfile(si, inode); if (unlikely(error)) goto bad_swap; inode_lock(inode); if (d_unlinked(dentry) || cant_mount(dentry)) { error = -ENOENT; goto bad_swap_unlock_inode; } if (IS_SWAPFILE(inode)) { error = -EBUSY; goto bad_swap_unlock_inode; } /* * Read the swap header. */ if (!mapping->a_ops->read_folio) { error = -EINVAL; goto bad_swap_unlock_inode; } folio = read_mapping_folio(mapping, 0, swap_file); if (IS_ERR(folio)) { error = PTR_ERR(folio); goto bad_swap_unlock_inode; } swap_header = kmap_local_folio(folio, 0); maxpages = read_swap_header(si, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; goto bad_swap_unlock_inode; } /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; goto bad_swap_unlock_inode; } error = swap_cgroup_swapon(si->type, maxpages); if (error) goto bad_swap_unlock_inode; nr_extents = setup_swap_map_and_extents(si, swap_header, swap_map, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; goto bad_swap_unlock_inode; } /* * Use kvmalloc_array instead of bitmap_zalloc as the allocation order might * be above MAX_PAGE_ORDER incase of a large swap file. */ zeromap = kvmalloc_array(BITS_TO_LONGS(maxpages), sizeof(long), GFP_KERNEL | __GFP_ZERO); if (!zeromap) { error = -ENOMEM; goto bad_swap_unlock_inode; } if (si->bdev && bdev_stable_writes(si->bdev)) si->flags |= SWP_STABLE_WRITES; if (si->bdev && bdev_synchronous(si->bdev)) si->flags |= SWP_SYNCHRONOUS_IO; if (si->bdev && bdev_nonrot(si->bdev)) { si->flags |= SWP_SOLIDSTATE; } else { atomic_inc(&nr_rotate_swap); inced_nr_rotate_swap = true; } cluster_info = setup_clusters(si, swap_header, maxpages); if (IS_ERR(cluster_info)) { error = PTR_ERR(cluster_info); cluster_info = NULL; goto bad_swap_unlock_inode; } if ((swap_flags & SWAP_FLAG_DISCARD) && si->bdev && bdev_max_discard_sectors(si->bdev)) { /* * When discard is enabled for swap with no particular * policy flagged, we set all swap discard flags here in * order to sustain backward compatibility with older * swapon(8) releases. */ si->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | SWP_PAGE_DISCARD); /* * By flagging sys_swapon, a sysadmin can tell us to * either do single-time area discards only, or to just * perform discards for released swap page-clusters. * Now it's time to adjust the p->flags accordingly. */ if (swap_flags & SWAP_FLAG_DISCARD_ONCE) si->flags &= ~SWP_PAGE_DISCARD; else if (swap_flags & SWAP_FLAG_DISCARD_PAGES) si->flags &= ~SWP_AREA_DISCARD; /* issue a swapon-time discard if it's still required */ if (si->flags & SWP_AREA_DISCARD) { int err = discard_swap(si); if (unlikely(err)) pr_err("swapon: discard_swap(%p): %d\n", si, err); } } error = init_swap_address_space(si->type, maxpages); if (error) goto bad_swap_unlock_inode; error = zswap_swapon(si->type, maxpages); if (error) goto free_swap_address_space; /* * Flush any pending IO and dirty mappings before we start using this * swap device. */ inode->i_flags |= S_SWAPFILE; error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; goto free_swap_zswap; } mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) prio = (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; enable_swap_info(si, prio, swap_map, cluster_info, zeromap); pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n", K(si->pages), name->name, si->prio, nr_extents, K((unsigned long long)span), (si->flags & SWP_SOLIDSTATE) ? "SS" : "", (si->flags & SWP_DISCARDABLE) ? "D" : "", (si->flags & SWP_AREA_DISCARD) ? "s" : "", (si->flags & SWP_PAGE_DISCARD) ? "c" : ""); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); error = 0; goto out; free_swap_zswap: zswap_swapoff(si->type); free_swap_address_space: exit_swap_address_space(si->type); bad_swap_unlock_inode: inode_unlock(inode); bad_swap: free_percpu(si->percpu_cluster); si->percpu_cluster = NULL; kfree(si->global_cluster); si->global_cluster = NULL; inode = NULL; destroy_swap_extents(si); swap_cgroup_swapoff(si->type); spin_lock(&swap_lock); si->swap_file = NULL; si->flags = 0; spin_unlock(&swap_lock); vfree(swap_map); kvfree(zeromap); kvfree(cluster_info); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); if (swap_file) filp_close(swap_file, NULL); out: if (!IS_ERR_OR_NULL(folio)) folio_release_kmap(folio, swap_header); if (name) putname(name); if (inode) inode_unlock(inode); if (!error) enable_swap_slots_cache(); return error; } void si_swapinfo(struct sysinfo *val) { unsigned int type; unsigned long nr_to_be_unused = 0; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *si = swap_info[type]; if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) nr_to_be_unused += swap_usage_in_pages(si); } val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; spin_unlock(&swap_lock); } /* * Verify that nr swap entries are valid and increment their swap map counts. * * Returns error code in following case. * - success -> 0 * - swp_entry is invalid -> EINVAL * - swp_entry is migration entry -> EINVAL * - swap-cache reference is requested but there is already one. -> EEXIST * - swap-cache reference is requested but the entry is not used. -> ENOENT * - swap-mapped reference requested but needs continued swap count. -> ENOMEM */ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr) { struct swap_info_struct *si; struct swap_cluster_info *ci; unsigned long offset; unsigned char count; unsigned char has_cache; int err, i; si = swp_swap_info(entry); offset = swp_offset(entry); VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); VM_WARN_ON(usage == 1 && nr > 1); ci = lock_cluster(si, offset); err = 0; for (i = 0; i < nr; i++) { count = si->swap_map[offset + i]; /* * swapin_readahead() doesn't check if a swap entry is valid, so the * swap entry could be SWAP_MAP_BAD. Check here with lock held. */ if (unlikely(swap_count(count) == SWAP_MAP_BAD)) { err = -ENOENT; goto unlock_out; } has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; if (!count && !has_cache) { err = -ENOENT; } else if (usage == SWAP_HAS_CACHE) { if (has_cache) err = -EEXIST; } else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX) { err = -EINVAL; } if (err) goto unlock_out; } for (i = 0; i < nr; i++) { count = si->swap_map[offset + i]; has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; if (usage == SWAP_HAS_CACHE) has_cache = SWAP_HAS_CACHE; else if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX) count += usage; else if (swap_count_continued(si, offset + i, count)) count = COUNT_CONTINUED; else { /* * Don't need to rollback changes, because if * usage == 1, there must be nr == 1. */ err = -ENOMEM; goto unlock_out; } WRITE_ONCE(si->swap_map[offset + i], count | has_cache); } unlock_out: unlock_cluster(ci); return err; } /* * Help swapoff by noting that swap entry belongs to shmem/tmpfs * (in which case its reference count is never incremented). */ void swap_shmem_alloc(swp_entry_t entry, int nr) { __swap_duplicate(entry, SWAP_MAP_SHMEM, nr); } /* * Increase reference count of swap entry by 1. * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required * but could not be atomically allocated. Returns 0, just as if it succeeded, * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which * might occur if a page table entry has got corrupted. */ int swap_duplicate(swp_entry_t entry) { int err = 0; while (!err && __swap_duplicate(entry, 1, 1) == -ENOMEM) err = add_swap_count_continuation(entry, GFP_ATOMIC); return err; } /* * @entry: first swap entry from which we allocate nr swap cache. * * Called when allocating swap cache for existing swap entries, * This can return error codes. Returns 0 at success. * -EEXIST means there is a swap cache. * Note: return code is different from swap_duplicate(). */ int swapcache_prepare(swp_entry_t entry, int nr) { return __swap_duplicate(entry, SWAP_HAS_CACHE, nr); } void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr) { unsigned long offset = swp_offset(entry); cluster_swap_free_nr(si, offset, nr, SWAP_HAS_CACHE); } struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); } /* * out-of-line methods to avoid include hell. */ struct address_space *swapcache_mapping(struct folio *folio) { return swp_swap_info(folio->swap)->swap_file->f_mapping; } EXPORT_SYMBOL_GPL(swapcache_mapping); pgoff_t __folio_swap_cache_index(struct folio *folio) { return swap_cache_index(folio->swap); } EXPORT_SYMBOL_GPL(__folio_swap_cache_index); /* * add_swap_count_continuation - called when a swap count is duplicated * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's * page of the original vmalloc'ed swap_map, to hold the continuation count * (for that entry and for its neighbouring PAGE_SIZE swap entries). Called * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc. * * These continuation pages are seldom referenced: the common paths all work * on the original swap_map, only referring to a continuation page when the * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX. * * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL) * can be called after dropping locks. */ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) { struct swap_info_struct *si; struct swap_cluster_info *ci; struct page *head; struct page *page; struct page *list_page; pgoff_t offset; unsigned char count; int ret = 0; /* * When debugging, it's easier to use __GFP_ZERO here; but it's better * for latency not to zero a page while GFP_ATOMIC and holding locks. */ page = alloc_page(gfp_mask | __GFP_HIGHMEM); si = get_swap_device(entry); if (!si) { /* * An acceptable race has occurred since the failing * __swap_duplicate(): the swap device may be swapoff */ goto outer; } offset = swp_offset(entry); ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) { /* * The higher the swap count, the more likely it is that tasks * will race to add swap count continuation: we need to avoid * over-provisioning. */ goto out; } if (!page) { ret = -ENOMEM; goto out; } head = vmalloc_to_page(si->swap_map + offset); offset &= ~PAGE_MASK; spin_lock(&si->cont_lock); /* * Page allocation does not initialize the page's lru field, * but it does always reset its private field. */ if (!page_private(head)) { BUG_ON(count & COUNT_CONTINUED); INIT_LIST_HEAD(&head->lru); set_page_private(head, SWP_CONTINUED); si->flags |= SWP_CONTINUED; } list_for_each_entry(list_page, &head->lru, lru) { unsigned char *map; /* * If the previous map said no continuation, but we've found * a continuation page, free our allocation and use this one. */ if (!(count & COUNT_CONTINUED)) goto out_unlock_cont; map = kmap_local_page(list_page) + offset; count = *map; kunmap_local(map); /* * If this continuation count now has some space in it, * free our allocation and use this one. */ if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX) goto out_unlock_cont; } list_add_tail(&page->lru, &head->lru); page = NULL; /* now it's attached, don't free it */ out_unlock_cont: spin_unlock(&si->cont_lock); out: unlock_cluster(ci); put_swap_device(si); outer: if (page) __free_page(page); return ret; } /* * swap_count_continued - when the original swap_map count is incremented * from SWAP_MAP_MAX, check if there is already a continuation page to carry * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster * lock. */ static bool swap_count_continued(struct swap_info_struct *si, pgoff_t offset, unsigned char count) { struct page *head; struct page *page; unsigned char *map; bool ret; head = vmalloc_to_page(si->swap_map + offset); if (page_private(head) != SWP_CONTINUED) { BUG_ON(count & COUNT_CONTINUED); return false; /* need to add count continuation */ } spin_lock(&si->cont_lock); offset &= ~PAGE_MASK; page = list_next_entry(head, lru); map = kmap_local_page(page) + offset; if (count == SWAP_MAP_MAX) /* initial increment from swap_map */ goto init_map; /* jump over SWAP_CONT_MAX checks */ if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */ /* * Think of how you add 1 to 999 */ while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) { kunmap_local(map); page = list_next_entry(page, lru); BUG_ON(page == head); map = kmap_local_page(page) + offset; } if (*map == SWAP_CONT_MAX) { kunmap_local(map); page = list_next_entry(page, lru); if (page == head) { ret = false; /* add count continuation */ goto out; } map = kmap_local_page(page) + offset; init_map: *map = 0; /* we didn't zero the page */ } *map += 1; kunmap_local(map); while ((page = list_prev_entry(page, lru)) != head) { map = kmap_local_page(page) + offset; *map = COUNT_CONTINUED; kunmap_local(map); } ret = true; /* incremented */ } else { /* decrementing */ /* * Think of how you subtract 1 from 1000 */ BUG_ON(count != COUNT_CONTINUED); while (*map == COUNT_CONTINUED) { kunmap_local(map); page = list_next_entry(page, lru); BUG_ON(page == head); map = kmap_local_page(page) + offset; } BUG_ON(*map == 0); *map -= 1; if (*map == 0) count = 0; kunmap_local(map); while ((page = list_prev_entry(page, lru)) != head) { map = kmap_local_page(page) + offset; *map = SWAP_CONT_MAX | count; count = COUNT_CONTINUED; kunmap_local(map); } ret = count == COUNT_CONTINUED; } out: spin_unlock(&si->cont_lock); return ret; } /* * free_swap_count_continuations - swapoff free all the continuation pages * appended to the swap_map, after swap_map is quiesced, before vfree'ing it. */ static void free_swap_count_continuations(struct swap_info_struct *si) { pgoff_t offset; for (offset = 0; offset < si->max; offset += PAGE_SIZE) { struct page *head; head = vmalloc_to_page(si->swap_map + offset); if (page_private(head)) { struct page *page, *next; list_for_each_entry_safe(page, next, &head->lru, lru) { list_del(&page->lru); __free_page(page); } } } } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { struct swap_info_struct *si, *next; int nid = folio_nid(folio); if (!(gfp & __GFP_IO)) return; if (!__has_usable_swap()) return; if (!blk_cgroup_congested()) return; /* * We've already scheduled a throttle, avoid taking the global swap * lock. */ if (current->throttle_disk) return; spin_lock(&swap_avail_lock); plist_for_each_entry_safe(si, next, &swap_avail_heads[nid], avail_lists[nid]) { if (si->bdev) { blkcg_schedule_throttle(si->bdev->bd_disk, true); break; } } spin_unlock(&swap_avail_lock); } #endif static int __init swapfile_init(void) { int nid; swap_avail_heads = kmalloc_array(nr_node_ids, sizeof(struct plist_head), GFP_KERNEL); if (!swap_avail_heads) { pr_emerg("Not enough memory for swap heads, swap is disabled\n"); return -ENOMEM; } for_each_node(nid) plist_head_init(&swap_avail_heads[nid]); swapfile_maximum_size = arch_max_swapfile_size(); #ifdef CONFIG_MIGRATION if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS)) swap_migration_ad_supported = true; #endif /* CONFIG_MIGRATION */ return 0; } subsys_initcall(swapfile_init);
1 1 1 1 1 2 1 1 16 16 3 5 1 1 3 1 4 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> * Patrick Schaaf <bof@bof.de> * Martin Josefsson <gandalf@wlug.westbo.se> */ /* Kernel module implementing an IP set type: the bitmap:ip,mac type */ #include <linux/module.h> #include <linux/ip.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/if_ether.h> #include <linux/netlink.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <net/netlink.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Counter support added */ /* 2 Comment support added */ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac #define HOST_MASK 32 #define IP_SET_BITMAP_STORED_TIMEOUT enum { MAC_UNSET, /* element is set, without MAC */ MAC_FILLED, /* element is set with MAC */ }; /* Type structure */ struct bitmap_ipmac { unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ struct timer_list gc; /* garbage collector */ struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[] /* MAC + data extensions */ __aligned(__alignof__(u64)); }; /* ADT structure for generic function args */ struct bitmap_ipmac_adt_elem { unsigned char ether[ETH_ALEN] __aligned(2); u16 id; u16 add_mac; }; struct bitmap_ipmac_elem { unsigned char ether[ETH_ALEN]; unsigned char filled; } __aligned(__alignof__(u64)); static u32 ip_to_id(const struct bitmap_ipmac *m, u32 ip) { return ip - m->first_ip; } #define get_elem(extensions, id, dsize) \ (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize)) #define get_const_elem(extensions, id, dsize) \ (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize)) /* Common functions */ static int bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(e->id, map->members)) return 0; elem = get_const_elem(map->extensions, e->id, dsize); if (e->add_mac && elem->filled == MAC_FILLED) return ether_addr_equal(e->ether, elem->ether); /* Trigger kernel to fill out the ethernet address */ return -EAGAIN; } static int bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(id, map->members)) return 0; elem = get_const_elem(map->extensions, id, dsize); /* Timer not started for the incomplete elements */ return elem->filled == MAC_FILLED; } static int bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem) { return elem->filled == MAC_FILLED; } static int bitmap_ipmac_add_timeout(unsigned long *timeout, const struct bitmap_ipmac_adt_elem *e, const struct ip_set_ext *ext, struct ip_set *set, struct bitmap_ipmac *map, int mode) { u32 t = ext->timeout; if (mode == IPSET_ADD_START_STORED_TIMEOUT) { if (t == set->timeout) /* Timeout was not specified, get stored one */ t = *timeout; ip_set_timeout_set(timeout, t); } else { /* If MAC is unset yet, we store plain timeout value * because the timer is not activated yet * and we can reuse it later when MAC is filled out, * possibly by the kernel */ if (e->add_mac) ip_set_timeout_set(timeout, t); else *timeout = t; } return 0; } static int bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, struct bitmap_ipmac *map, u32 flags, size_t dsize) { struct bitmap_ipmac_elem *elem; elem = get_elem(map->extensions, e->id, dsize); if (test_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { if (e->add_mac && (flags & IPSET_FLAG_EXIST) && !ether_addr_equal(e->ether, elem->ether)) { /* memcpy isn't atomic */ clear_bit(e->id, map->members); smp_mb__after_atomic(); ether_addr_copy(elem->ether, e->ether); } return IPSET_ADD_FAILED; } else if (!e->add_mac) /* Already added without ethernet address */ return IPSET_ADD_FAILED; /* Fill the MAC address and trigger the timer activation */ clear_bit(e->id, map->members); smp_mb__after_atomic(); ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return IPSET_ADD_START_STORED_TIMEOUT; } else if (e->add_mac) { /* We can store MAC too */ ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return 0; } elem->filled = MAC_UNSET; /* MAC is not stored yet, don't start timer */ return IPSET_ADD_STORE_PLAIN_TIMEOUT; } static int bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, struct bitmap_ipmac *map) { return !test_and_clear_bit(e->id, map->members); } static int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, u32 id, size_t dsize) { const struct bitmap_ipmac_elem *elem = get_const_elem(map->extensions, id, dsize); return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id)) || (elem->filled == MAC_FILLED && nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether)); } static int bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map) { return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); } static int bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; /* Backward compatibility: we don't check the second flag */ if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; e.id = ip_to_id(map, ip); if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); if (is_zero_ether_addr(e.ether)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; e.id = ip_to_id(map, ip); if (tb[IPSET_ATTR_ETHER]) { if (nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN) return -IPSET_ERR_PROTOCOL; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); e.add_mac = 1; } ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } static bool bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) { const struct bitmap_ipmac *x = a->data; const struct bitmap_ipmac *y = b->data; return x->first_ip == y->first_ip && x->last_ip == y->last_ip && a->timeout == b->timeout && a->extensions == b->extensions; } /* Plain variant */ #include "ip_set_bitmap_gen.h" /* Create bitmap:ip,mac type of sets */ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; map->last_ip = last_ip; map->elements = elements; set->timeout = IPSET_NO_TIMEOUT; map->set = set; set->data = map; set->family = NFPROTO_IPV4; return true; } static int bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 first_ip = 0, last_ip = 0; u64 elements; struct bitmap_ipmac *map; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); if (ret) return ret; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); if (ret) return ret; if (first_ip > last_ip) swap(first_ip, last_ip); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); } else { return -IPSET_ERR_PROTOCOL; } elements = (u64)last_ip - first_ip + 1; if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; set->dsize = ip_set_elem_len(set, tb, sizeof(struct bitmap_ipmac_elem), __alignof__(struct bitmap_ipmac_elem)); map = ip_set_alloc(sizeof(*map) + elements * set->dsize); if (!map) return -ENOMEM; map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } return 0; } static struct ip_set_type bitmap_ipmac_type = { .name = "bitmap:ip,mac", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init bitmap_ipmac_init(void) { return ip_set_type_register(&bitmap_ipmac_type); } static void __exit bitmap_ipmac_fini(void) { rcu_barrier(); ip_set_type_unregister(&bitmap_ipmac_type); } module_init(bitmap_ipmac_init); module_exit(bitmap_ipmac_fini);
189 190 189 61 204 204 9 2 18 194 204 40 1 40 201 201 140 79 12 68 1 191 191 99 97 6 11 10 5 172 171 172 2 162 179 129 2 190 161 9 32 129 74 3 62 170 3 1 1 1 160 143 160 34 34 34 34 32 34 25 12 12 12 34 34 34 34 23 22 4 172 172 171 171 34 164 2 17 17 168 161 149 23 160 157 8 156 8 160 140 34 160 170 160 101 109 160 21 3 1 160 3 44 102 38 35 37 98 1 101 8 78 54 21 10 10 10 10 30 13 10 3 9 6 17 23 10 13 9 6 3 6 5 4 6 3 3 6 21 21 26 10 21 24 7 10 8 8 8 8 8 8 8 8 8 8 8 8 8 7 1 8 5 5 5 4 3 1 7 7 32 32 1 29 1 29 9 24 3 3 1 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ /* * jfs_txnmgr.c: transaction manager * * notes: * transaction starts with txBegin() and ends with txCommit() * or txAbort(). * * tlock is acquired at the time of update; * (obviate scan at commit time for xtree and dtree) * tlock and mp points to each other; * (no hashlist for mp -> tlock). * * special cases: * tlock on in-memory inode: * in-place tlock in the in-memory inode itself; * converted to page lock by iWrite() at commit time. * * tlock during write()/mmap() under anonymous transaction (tid = 0): * transferred (?) to transaction at commit time. * * use the page itself to update allocation maps * (obviate intermediate replication of allocation/deallocation data) * hold on to mp+lock thru update of maps */ #include <linux/fs.h> #include <linux/vmalloc.h> #include <linux/completion.h> #include <linux/freezer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kthread.h> #include <linux/seq_file.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_metapage.h" #include "jfs_dinode.h" #include "jfs_imap.h" #include "jfs_dmap.h" #include "jfs_superblock.h" #include "jfs_debug.h" /* * transaction management structures */ static struct { int freetid; /* index of a free tid structure */ int freelock; /* index first free lock word */ wait_queue_head_t freewait; /* eventlist of free tblock */ wait_queue_head_t freelockwait; /* eventlist of free tlock */ wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ int tlocksInUse; /* Number of tlocks in use */ spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ /* struct tblock *sync_queue; * Transactions waiting for data sync */ struct list_head unlock_queue; /* Txns waiting to be released */ struct list_head anon_list; /* inodes having anonymous txns */ struct list_head anon_list2; /* inodes having anonymous txns that couldn't be sync'ed */ } TxAnchor; int jfs_tlocks_low; /* Indicates low number of available tlocks */ #ifdef CONFIG_JFS_STATISTICS static struct { uint txBegin; uint txBegin_barrier; uint txBegin_lockslow; uint txBegin_freetid; uint txBeginAnon; uint txBeginAnon_barrier; uint txBeginAnon_lockslow; uint txLockAlloc; uint txLockAlloc_freelock; } TxStat; #endif static int nTxBlock = -1; /* number of transaction blocks */ module_param(nTxBlock, int, 0); MODULE_PARM_DESC(nTxBlock, "Number of transaction blocks (max:65536)"); static int nTxLock = -1; /* number of transaction locks */ module_param(nTxLock, int, 0); MODULE_PARM_DESC(nTxLock, "Number of transaction locks (max:65536)"); struct tblock *TxBlock; /* transaction block table */ static int TxLockLWM; /* Low water mark for number of txLocks used */ static int TxLockHWM; /* High water mark for number of txLocks used */ static int TxLockVHWM; /* Very High water mark */ struct tlock *TxLock; /* transaction lock table */ /* * transaction management lock */ static DEFINE_SPINLOCK(jfsTxnLock); #define TXN_LOCK() spin_lock(&jfsTxnLock) #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock) #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); static int jfs_commit_thread_waking; /* * Retry logic exist outside these macros to protect from spurrious wakeups. */ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(event, &wait); set_current_state(TASK_UNINTERRUPTIBLE); TXN_UNLOCK(); io_schedule(); remove_wait_queue(event, &wait); } #define TXN_SLEEP(event)\ {\ TXN_SLEEP_DROP_LOCK(event);\ TXN_LOCK();\ } #define TXN_WAKEUP(event) wake_up_all(event) /* * statistics */ static struct { tid_t maxtid; /* 4: biggest tid ever used */ lid_t maxlid; /* 4: biggest lid ever used */ int ntid; /* 4: # of transactions performed */ int nlid; /* 4: # of tlocks acquired */ int waitlock; /* 4: # of tlock wait */ } stattx; /* * forward references */ static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck, struct commit *cd); static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck); static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk); static void txForce(struct tblock * tblk); static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd); static void txUpdateMap(struct tblock * tblk); static void txRelease(struct tblock * tblk); static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void LogSyncRelease(struct metapage * mp); /* * transaction block/lock management * --------------------------------- */ /* * Get a transaction lock from the free list. If the number in use is * greater than the high water mark, wake up the sync daemon. This should * free some anonymous transaction locks. (TXN_LOCK must be held.) */ static lid_t txLockAlloc(void) { lid_t lid; INCREMENT(TxStat.txLockAlloc); if (!TxAnchor.freelock) { INCREMENT(TxStat.txLockAlloc_freelock); } while (!(lid = TxAnchor.freelock)) TXN_SLEEP(&TxAnchor.freelockwait); TxAnchor.freelock = TxLock[lid].next; HIGHWATERMARK(stattx.maxlid, lid); if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { jfs_info("txLockAlloc tlocks low"); jfs_tlocks_low = 1; wake_up_process(jfsSyncThread); } return lid; } static void txLockFree(lid_t lid) { TxLock[lid].tid = 0; TxLock[lid].next = TxAnchor.freelock; TxAnchor.freelock = lid; TxAnchor.tlocksInUse--; if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { jfs_info("txLockFree jfs_tlocks_low no more"); jfs_tlocks_low = 0; TXN_WAKEUP(&TxAnchor.lowlockwait); } TXN_WAKEUP(&TxAnchor.freelockwait); } /* * NAME: txInit() * * FUNCTION: initialize transaction management structures * * RETURN: * * serialization: single thread at jfs_init() */ int txInit(void) { int k, size; struct sysinfo si; /* Set defaults for nTxLock and nTxBlock if unset */ if (nTxLock == -1) { if (nTxBlock == -1) { /* Base default on memory size */ si_meminfo(&si); if (si.totalram > (256 * 1024)) /* 1 GB */ nTxLock = 64 * 1024; else nTxLock = si.totalram >> 2; } else if (nTxBlock > (8 * 1024)) nTxLock = 64 * 1024; else nTxLock = nTxBlock << 3; } if (nTxBlock == -1) nTxBlock = nTxLock >> 3; /* Verify tunable parameters */ if (nTxBlock < 16) nTxBlock = 16; /* No one should set it this low */ if (nTxBlock > 65536) nTxBlock = 65536; if (nTxLock < 256) nTxLock = 256; /* No one should set it this low */ if (nTxLock > 65536) nTxLock = 65536; printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", nTxBlock, nTxLock); /* * initialize transaction block (tblock) table * * transaction id (tid) = tblock index * tid = 0 is reserved. */ TxLockLWM = (nTxLock * 4) / 10; TxLockHWM = (nTxLock * 7) / 10; TxLockVHWM = (nTxLock * 8) / 10; size = sizeof(struct tblock) * nTxBlock; TxBlock = vmalloc(size); if (TxBlock == NULL) return -ENOMEM; for (k = 1; k < nTxBlock - 1; k++) { TxBlock[k].next = k + 1; init_waitqueue_head(&TxBlock[k].gcwait); init_waitqueue_head(&TxBlock[k].waitor); } TxBlock[k].next = 0; init_waitqueue_head(&TxBlock[k].gcwait); init_waitqueue_head(&TxBlock[k].waitor); TxAnchor.freetid = 1; init_waitqueue_head(&TxAnchor.freewait); stattx.maxtid = 1; /* statistics */ /* * initialize transaction lock (tlock) table * * transaction lock id = tlock index * tlock id = 0 is reserved. */ size = sizeof(struct tlock) * nTxLock; TxLock = vmalloc(size); if (TxLock == NULL) { vfree(TxBlock); return -ENOMEM; } /* initialize tlock table */ for (k = 1; k < nTxLock - 1; k++) TxLock[k].next = k + 1; TxLock[k].next = 0; init_waitqueue_head(&TxAnchor.freelockwait); init_waitqueue_head(&TxAnchor.lowlockwait); TxAnchor.freelock = 1; TxAnchor.tlocksInUse = 0; INIT_LIST_HEAD(&TxAnchor.anon_list); INIT_LIST_HEAD(&TxAnchor.anon_list2); LAZY_LOCK_INIT(); INIT_LIST_HEAD(&TxAnchor.unlock_queue); stattx.maxlid = 1; /* statistics */ return 0; } /* * NAME: txExit() * * FUNCTION: clean up when module is unloaded */ void txExit(void) { vfree(TxLock); TxLock = NULL; vfree(TxBlock); TxBlock = NULL; } /* * NAME: txBegin() * * FUNCTION: start a transaction. * * PARAMETER: sb - superblock * flag - force for nested tx; * * RETURN: tid - transaction id * * note: flag force allows to start tx for nested tx * to prevent deadlock on logsync barrier; */ tid_t txBegin(struct super_block *sb, int flag) { tid_t t; struct tblock *tblk; struct jfs_log *log; jfs_info("txBegin: flag = 0x%x", flag); log = JFS_SBI(sb)->log; if (!log) { jfs_error(sb, "read-only filesystem\n"); return 0; } TXN_LOCK(); INCREMENT(TxStat.txBegin); retry: if (!(flag & COMMIT_FORCE)) { /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag) || test_bit(log_QUIESCE, &log->flag)) { INCREMENT(TxStat.txBegin_barrier); TXN_SLEEP(&log->syncwait); goto retry; } } if (flag == 0) { /* * Don't begin transaction if we're getting starved for tlocks * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately * free tlocks) */ if (TxAnchor.tlocksInUse > TxLockVHWM) { INCREMENT(TxStat.txBegin_lockslow); TXN_SLEEP(&TxAnchor.lowlockwait); goto retry; } } /* * allocate transaction id/block */ if ((t = TxAnchor.freetid) == 0) { jfs_info("txBegin: waiting for free tid"); INCREMENT(TxStat.txBegin_freetid); TXN_SLEEP(&TxAnchor.freewait); goto retry; } tblk = tid_to_tblock(t); if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { /* Don't let a non-forced transaction take the last tblk */ jfs_info("txBegin: waiting for free tid"); INCREMENT(TxStat.txBegin_freetid); TXN_SLEEP(&TxAnchor.freewait); goto retry; } TxAnchor.freetid = tblk->next; /* * initialize transaction */ /* * We can't zero the whole thing or we screw up another thread being * awakened after sleeping on tblk->waitor * * memset(tblk, 0, sizeof(struct tblock)); */ tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; tblk->sb = sb; ++log->logtid; tblk->logtid = log->logtid; ++log->active; HIGHWATERMARK(stattx.maxtid, t); /* statistics */ INCREMENT(stattx.ntid); /* statistics */ TXN_UNLOCK(); jfs_info("txBegin: returning tid = %d", t); return t; } /* * NAME: txBeginAnon() * * FUNCTION: start an anonymous transaction. * Blocks if logsync or available tlocks are low to prevent * anonymous tlocks from depleting supply. * * PARAMETER: sb - superblock * * RETURN: none */ void txBeginAnon(struct super_block *sb) { struct jfs_log *log; log = JFS_SBI(sb)->log; TXN_LOCK(); INCREMENT(TxStat.txBeginAnon); retry: /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag) || test_bit(log_QUIESCE, &log->flag)) { INCREMENT(TxStat.txBeginAnon_barrier); TXN_SLEEP(&log->syncwait); goto retry; } /* * Don't begin transaction if we're getting starved for tlocks */ if (TxAnchor.tlocksInUse > TxLockVHWM) { INCREMENT(TxStat.txBeginAnon_lockslow); TXN_SLEEP(&TxAnchor.lowlockwait); goto retry; } TXN_UNLOCK(); } /* * txEnd() * * function: free specified transaction block. * * logsync barrier processing: * * serialization: */ void txEnd(tid_t tid) { struct tblock *tblk = tid_to_tblock(tid); struct jfs_log *log; jfs_info("txEnd: tid = %d", tid); TXN_LOCK(); /* * wakeup transactions waiting on the page locked * by the current transaction */ TXN_WAKEUP(&tblk->waitor); log = JFS_SBI(tblk->sb)->log; /* * Lazy commit thread can't free this guy until we mark it UNLOCKED, * otherwise, we would be left with a transaction that may have been * reused. * * Lazy commit thread will turn off tblkGC_LAZY before calling this * routine. */ if (tblk->flag & tblkGC_LAZY) { jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); TXN_UNLOCK(); spin_lock_irq(&log->gclock); // LOGGC_LOCK tblk->flag |= tblkGC_UNLOCKED; spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK return; } jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); assert(tblk->next == 0); /* * insert tblock back on freelist */ tblk->next = TxAnchor.freetid; TxAnchor.freetid = tid; /* * mark the tblock not active */ if (--log->active == 0) { clear_bit(log_FLUSH, &log->flag); /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag)) { TXN_UNLOCK(); /* write dirty metadata & forward log syncpt */ jfs_syncpt(log, 1); jfs_info("log barrier off: 0x%x", log->lsn); /* enable new transactions start */ clear_bit(log_SYNCBARRIER, &log->flag); /* wakeup all waitors for logsync barrier */ TXN_WAKEUP(&log->syncwait); goto wakeup; } } TXN_UNLOCK(); wakeup: /* * wakeup all waitors for a free tblock */ TXN_WAKEUP(&TxAnchor.freewait); } /* * txLock() * * function: acquire a transaction lock on the specified <mp> * * parameter: * * return: transaction lock id * * serialization: */ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, int type) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); int dir_xtree = 0; lid_t lid; tid_t xtid; struct tlock *tlck; struct xtlock *xtlck; struct linelock *linelock; xtpage_t *p; struct tblock *tblk; TXN_LOCK(); if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && !(mp->xflag & COMMIT_PAGE)) { /* * Directory inode is special. It can have both an xtree tlock * and a dtree tlock associated with it. */ dir_xtree = 1; lid = jfs_ip->xtlid; } else lid = mp->lid; /* is page not locked by a transaction ? */ if (lid == 0) goto allocateLock; jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); /* is page locked by the requester transaction ? */ tlck = lid_to_tlock(lid); if ((xtid = tlck->tid) == tid) { TXN_UNLOCK(); goto grantLock; } /* * is page locked by anonymous transaction/lock ? * * (page update without transaction (i.e., file write) is * locked under anonymous transaction tid = 0: * anonymous tlocks maintained on anonymous tlock list of * the inode of the page and available to all anonymous * transactions until txCommit() time at which point * they are transferred to the transaction tlock list of * the committing transaction of the inode) */ if (xtid == 0) { tlck->tid = tid; TXN_UNLOCK(); tblk = tid_to_tblock(tid); /* * The order of the tlocks in the transaction is important * (during truncate, child xtree pages must be freed before * parent's tlocks change the working map). * Take tlock off anonymous list and add to tail of * transaction list * * Note: We really need to get rid of the tid & lid and * use list_head's. This code is getting UGLY! */ if (jfs_ip->atlhead == lid) { if (jfs_ip->atltail == lid) { /* only anonymous txn. * Remove from anon_list */ TXN_LOCK(); list_del_init(&jfs_ip->anon_inode_list); TXN_UNLOCK(); } jfs_ip->atlhead = tlck->next; } else { lid_t last; for (last = jfs_ip->atlhead; lid_to_tlock(last)->next != lid; last = lid_to_tlock(last)->next) { assert(last); } lid_to_tlock(last)->next = tlck->next; if (jfs_ip->atltail == lid) jfs_ip->atltail = last; } /* insert the tlock at tail of transaction tlock list */ if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; goto grantLock; } goto waitLock; /* * allocate a tlock */ allocateLock: lid = txLockAlloc(); tlck = lid_to_tlock(lid); /* * initialize tlock */ tlck->tid = tid; TXN_UNLOCK(); /* mark tlock for meta-data page */ if (mp->xflag & COMMIT_PAGE) { tlck->flag = tlckPAGELOCK; /* mark the page dirty and nohomeok */ metapage_nohomeok(mp); jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", mp, mp->nohomeok, tid, tlck); /* if anonymous transaction, and buffer is on the group * commit synclist, mark inode to show this. This will * prevent the buffer from being marked nohomeok for too * long a time. */ if ((tid == 0) && mp->lsn) set_cflag(COMMIT_Synclist, ip); } /* mark tlock for in-memory inode */ else tlck->flag = tlckINODELOCK; if (S_ISDIR(ip->i_mode)) tlck->flag |= tlckDIRECTORY; tlck->type = 0; /* bind the tlock and the page */ tlck->ip = ip; tlck->mp = mp; if (dir_xtree) jfs_ip->xtlid = lid; else mp->lid = lid; /* * enqueue transaction lock to transaction/inode */ /* insert the tlock at tail of transaction tlock list */ if (tid) { tblk = tid_to_tblock(tid); if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; } /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list */ else { tlck->next = jfs_ip->atlhead; jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; TXN_LOCK(); list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); TXN_UNLOCK(); } } /* initialize type dependent area for linelock */ linelock = (struct linelock *) & tlck->lock; linelock->next = 0; linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKSHORT; linelock->index = 0; switch (type & tlckTYPE) { case tlckDTREE: linelock->l2linesize = L2DTSLOTSIZE; break; case tlckXTREE: linelock->l2linesize = L2XTSLOTSIZE; xtlck = (struct xtlock *) linelock; xtlck->header.offset = 0; xtlck->header.length = 2; if (type & tlckNEW) { xtlck->lwm.offset = XTENTRYSTART; } else { if (mp->xflag & COMMIT_PAGE) p = (xtpage_t *) mp->data; else p = (xtpage_t *) &jfs_ip->i_xtroot; xtlck->lwm.offset = le16_to_cpu(p->header.nextindex); } xtlck->lwm.length = 0; /* ! */ xtlck->twm.offset = 0; xtlck->hwm.offset = 0; xtlck->index = 2; break; case tlckINODE: linelock->l2linesize = L2INODESLOTSIZE; break; case tlckDATA: linelock->l2linesize = L2DATASLOTSIZE; break; default: jfs_err("UFO tlock:0x%p", tlck); } /* * update tlock vector */ grantLock: tlck->type |= type; return tlck; /* * page is being locked by another transaction: */ waitLock: /* Only locks on ipimap or ipaimap should reach here */ /* assert(jfs_ip->fileset == AGGREGATE_I); */ if (jfs_ip->fileset != AGGREGATE_I) { printk(KERN_ERR "txLock: trying to lock locked page!"); print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, ip, sizeof(*ip), 0); print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, mp, sizeof(*mp), 0); print_hex_dump(KERN_ERR, "Locker's tblock: ", DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), sizeof(struct tblock), 0); print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, tlck, sizeof(*tlck), 0); BUG(); } INCREMENT(stattx.waitlock); /* statistics */ TXN_UNLOCK(); release_metapage(mp); TXN_LOCK(); xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); /* Recheck everything since dropping TXN_LOCK */ if (xtid && (tlck->mp == mp) && (mp->lid == lid)) TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); else TXN_UNLOCK(); jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); return NULL; } /* * NAME: txRelease() * * FUNCTION: Release buffers associated with transaction locks, but don't * mark homeok yet. The allows other transactions to modify * buffers, but won't let them go to disk until commit record * actually gets written. * * PARAMETER: * tblk - * * RETURN: Errors from subroutines. */ static void txRelease(struct tblock * tblk) { struct metapage *mp; lid_t lid; struct tlock *tlck; TXN_LOCK(); for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); mp->lid = 0; } } /* * wakeup transactions waiting on a page locked * by the current transaction */ TXN_WAKEUP(&tblk->waitor); TXN_UNLOCK(); } /* * NAME: txUnlock() * * FUNCTION: Initiates pageout of pages modified by tid in journalled * objects and frees their lockwords. */ static void txUnlock(struct tblock * tblk) { struct tlock *tlck; struct linelock *linelock; lid_t lid, next, llid, k; struct metapage *mp; struct jfs_log *log; int difft, diffp; unsigned long flags; jfs_info("txUnlock: tblk = 0x%p", tblk); log = JFS_SBI(tblk->sb)->log; /* * mark page under tlock homeok (its log has been written): */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); /* unbind page from tlock */ if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); /* hold buffer */ hold_metapage(mp); assert(mp->nohomeok > 0); _metapage_homeok(mp); /* inherit younger/larger clsn */ LOGSYNC_LOCK(log, flags); if (mp->clsn) { logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; } else mp->clsn = tblk->clsn; LOGSYNC_UNLOCK(log, flags); assert(!(tlck->flag & tlckFREEPAGE)); put_metapage(mp); } /* insert tlock, and linelock(s) of the tlock if any, * at head of freelist */ TXN_LOCK(); llid = ((struct linelock *) & tlck->lock)->next; while (llid) { linelock = (struct linelock *) lid_to_tlock(llid); k = linelock->next; txLockFree(llid); llid = k; } txLockFree(lid); TXN_UNLOCK(); } tblk->next = tblk->last = 0; /* * remove tblock from logsynclist * (allocation map pages inherited lsn of tblk and * has been inserted in logsync list at txUpdateMap()) */ if (tblk->lsn) { LOGSYNC_LOCK(log, flags); log->count--; list_del(&tblk->synclist); LOGSYNC_UNLOCK(log, flags); } } /* * txMaplock() * * function: allocate a transaction lock for freed page/entry; * for freed page, maplock is used as xtlock/dtlock type; */ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); lid_t lid; struct tblock *tblk; struct tlock *tlck; struct maplock *maplock; TXN_LOCK(); /* * allocate a tlock */ lid = txLockAlloc(); tlck = lid_to_tlock(lid); /* * initialize tlock */ tlck->tid = tid; /* bind the tlock and the object */ tlck->flag = tlckINODELOCK; if (S_ISDIR(ip->i_mode)) tlck->flag |= tlckDIRECTORY; tlck->ip = ip; tlck->mp = NULL; tlck->type = type; /* * enqueue transaction lock to transaction/inode */ /* insert the tlock at tail of transaction tlock list */ if (tid) { tblk = tid_to_tblock(tid); if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; } /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list */ else { tlck->next = jfs_ip->atlhead; jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); } } TXN_UNLOCK(); /* initialize type dependent area for maplock */ maplock = (struct maplock *) & tlck->lock; maplock->next = 0; maplock->maxcnt = 0; maplock->index = 0; return tlck; } /* * txLinelock() * * function: allocate a transaction lock for log vector list */ struct linelock *txLinelock(struct linelock * tlock) { lid_t lid; struct tlock *tlck; struct linelock *linelock; TXN_LOCK(); /* allocate a TxLock structure */ lid = txLockAlloc(); tlck = lid_to_tlock(lid); TXN_UNLOCK(); /* initialize linelock */ linelock = (struct linelock *) tlck; linelock->next = 0; linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKLONG; linelock->index = 0; if (tlck->flag & tlckDIRECTORY) linelock->flag |= tlckDIRECTORY; /* append linelock after tlock */ linelock->next = tlock->next; tlock->next = lid; return linelock; } /* * transaction commit management * ----------------------------- */ /* * NAME: txCommit() * * FUNCTION: commit the changes to the objects specified in * clist. For journalled segments only the * changes of the caller are committed, ie by tid. * for non-journalled segments the data are flushed to * disk and then the change to the disk inode and indirect * blocks committed (so blocks newly allocated to the * segment will be made a part of the segment atomically). * * all of the segments specified in clist must be in * one file system. no more than 6 segments are needed * to handle all unix svcs. * * if the i_nlink field (i.e. disk inode link count) * is zero, and the type of inode is a regular file or * directory, or symbolic link , the inode is truncated * to zero length. the truncation is committed but the * VM resources are unaffected until it is closed (see * iput and iclose). * * PARAMETER: * * RETURN: * * serialization: * on entry the inode lock on each segment is assumed * to be held. * * i/o error: */ int txCommit(tid_t tid, /* transaction identifier */ int nip, /* number of inodes to commit */ struct inode **iplist, /* list of inode to commit */ int flag) { int rc = 0; struct commit cd; struct jfs_log *log; struct tblock *tblk; struct lrd *lrd; struct inode *ip; struct jfs_inode_info *jfs_ip; int k, n; ino_t top; struct super_block *sb; jfs_info("txCommit, tid = %d, flag = %d", tid, flag); /* is read-only file system ? */ if (isReadOnly(iplist[0])) { rc = -EROFS; goto TheEnd; } sb = cd.sb = iplist[0]->i_sb; cd.tid = tid; if (tid == 0) tid = txBegin(sb, 0); tblk = tid_to_tblock(tid); /* * initialize commit structure */ log = JFS_SBI(sb)->log; cd.log = log; /* initialize log record descriptor in commit */ lrd = &cd.lrd; lrd->logtid = cpu_to_le32(tblk->logtid); lrd->backchain = 0; tblk->xflag |= flag; if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) tblk->xflag |= COMMIT_LAZY; /* * prepare non-journaled objects for commit * * flush data pages of non-journaled file * to prevent the file getting non-initialized disk blocks * in case of crash. * (new blocks - ) */ cd.iplist = iplist; cd.nip = nip; /* * acquire transaction lock on (on-disk) inodes * * update on-disk inode from in-memory inode * acquiring transaction locks for AFTER records * on the on-disk inode of file object * * sort the inodes array by inode number in descending order * to prevent deadlock when acquiring transaction lock * of on-disk inodes on multiple on-disk inode pages by * multiple concurrent transactions */ for (k = 0; k < cd.nip; k++) { top = (cd.iplist[k])->i_ino; for (n = k + 1; n < cd.nip; n++) { ip = cd.iplist[n]; if (ip->i_ino > top) { top = ip->i_ino; cd.iplist[n] = cd.iplist[k]; cd.iplist[k] = ip; } } ip = cd.iplist[k]; jfs_ip = JFS_IP(ip); /* * BUGBUG - This code has temporarily been removed. The * intent is to ensure that any file data is written before * the metadata is committed to the journal. This prevents * uninitialized data from appearing in a file after the * journal has been replayed. (The uninitialized data * could be sensitive data removed by another user.) * * The problem now is that we are holding the IWRITELOCK * on the inode, and calling filemap_fdatawrite on an * unmapped page will cause a deadlock in jfs_get_block. * * The long term solution is to pare down the use of * IWRITELOCK. We are currently holding it too long. * We could also be smarter about which data pages need * to be written before the transaction is committed and * when we don't need to worry about it at all. * * if ((!S_ISDIR(ip->i_mode)) * && (tblk->flag & COMMIT_DELETE) == 0) * filemap_write_and_wait(ip->i_mapping); */ /* * Mark inode as not dirty. It will still be on the dirty * inode list, but we'll know not to commit it again unless * it gets marked dirty again */ clear_cflag(COMMIT_Dirty, ip); /* inherit anonymous tlock(s) of inode */ if (jfs_ip->atlhead) { lid_to_tlock(jfs_ip->atltail)->next = tblk->next; tblk->next = jfs_ip->atlhead; if (!tblk->last) tblk->last = jfs_ip->atltail; jfs_ip->atlhead = jfs_ip->atltail = 0; TXN_LOCK(); list_del_init(&jfs_ip->anon_inode_list); TXN_UNLOCK(); } /* * acquire transaction lock on on-disk inode page * (become first tlock of the tblk's tlock list) */ if (((rc = diWrite(tid, ip)))) goto out; } /* * write log records from transaction locks * * txUpdateMap() resets XAD_NEW in XAD. */ txLog(log, tblk, &cd); /* * Ensure that inode isn't reused before * lazy commit thread finishes processing */ if (tblk->xflag & COMMIT_DELETE) { ihold(tblk->u.ip); /* * Avoid a rare deadlock * * If the inode is locked, we may be blocked in * jfs_commit_inode. If so, we don't want the * lazy_commit thread doing the last iput() on the inode * since that may block on the locked inode. Instead, * commit the transaction synchronously, so the last iput * will be done by the calling thread (or later) */ /* * I believe this code is no longer needed. Splitting I_LOCK * into two bits, I_NEW and I_SYNC should prevent this * deadlock as well. But since I don't have a JFS testload * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. * Joern */ if (tblk->u.ip->i_state & I_SYNC) tblk->xflag &= ~COMMIT_LAZY; } ASSERT((!(tblk->xflag & COMMIT_DELETE)) || ((tblk->u.ip->i_nlink == 0) && !test_cflag(COMMIT_Nolink, tblk->u.ip))); /* * write COMMIT log record */ lrd->type = cpu_to_le16(LOG_COMMIT); lrd->length = 0; lmLog(log, tblk, lrd, NULL); lmGroupCommit(log, tblk); /* * - transaction is now committed - */ /* * force pages in careful update * (imap addressing structure update) */ if (flag & COMMIT_FORCE) txForce(tblk); /* * update allocation map. * * update inode allocation map and inode: * free pager lock on memory object of inode if any. * update block allocation map. * * txUpdateMap() resets XAD_NEW in XAD. */ if (tblk->xflag & COMMIT_FORCE) txUpdateMap(tblk); /* * free transaction locks and pageout/free pages */ txRelease(tblk); if ((tblk->flag & tblkGC_LAZY) == 0) txUnlock(tblk); /* * reset in-memory object state */ for (k = 0; k < cd.nip; k++) { ip = cd.iplist[k]; jfs_ip = JFS_IP(ip); /* * reset in-memory inode state */ jfs_ip->bxflag = 0; jfs_ip->blid = 0; } out: if (rc != 0) txAbort(tid, 1); TheEnd: jfs_info("txCommit: tid = %d, returning %d", tid, rc); return rc; } /* * NAME: txLog() * * FUNCTION: Writes AFTER log records for all lines modified * by tid for segments specified by inodes in comdata. * Code assumes only WRITELOCKS are recorded in lockwords. * * PARAMETERS: * * RETURN : */ static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd) { struct inode *ip; lid_t lid; struct tlock *tlck; struct lrd *lrd = &cd->lrd; /* * write log record(s) for each tlock of transaction, */ for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); tlck->flag |= tlckLOG; /* initialize lrd common */ ip = tlck->ip; lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); /* write log record of page from the tlock */ switch (tlck->type & tlckTYPE) { case tlckXTREE: xtLog(log, tblk, lrd, tlck); break; case tlckDTREE: dtLog(log, tblk, lrd, tlck); break; case tlckINODE: diLog(log, tblk, lrd, tlck, cd); break; case tlckMAP: mapLog(log, tblk, lrd, tlck); break; case tlckDATA: dataLog(log, tblk, lrd, tlck); break; default: jfs_err("UFO tlock:0x%p", tlck); } } return; } /* * diLog() * * function: log inode tlock and format maplock to update bmap; */ static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck, struct commit *cd) { struct metapage *mp; pxd_t *pxd; struct pxd_lock *pxdlock; mp = tlck->mp; /* initialize as REDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_INODE); lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); pxd = &lrd->log.redopage.pxd; /* * inode after image */ if (tlck->type & tlckENTRY) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } else if (tlck->type & tlckFREE) { /* * free inode extent * * (pages of the freed inode extent have been invalidated and * a maplock for free of the extent has been formatted at * txLock() time); * * the tlock had been acquired on the inode allocation map page * (iag) that specifies the freed extent, even though the map * page is not itself logged, to prevent pageout of the map * page before the log; */ /* log LOG_NOREDOINOEXT of the freed inode extent for * logredo() to start NoRedoPage filters, and to update * imap and bmap for free of the extent; */ lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); /* * For the LOG_NOREDOINOEXT record, we need * to pass the IAG number and inode extent * index (within that IAG) from which the * extent is being released. These have been * passed to us in the iplist[1] and iplist[2]. */ lrd->log.noredoinoext.iagnum = cpu_to_le32((u32) (size_t) cd->iplist[1]); lrd->log.noredoinoext.inoext_idx = cpu_to_le32((u32) (size_t) cd->iplist[2]); pxdlock = (struct pxd_lock *) & tlck->lock; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* update bmap */ tlck->flag |= tlckUPDATEMAP; /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } else jfs_err("diLog: UFO type tlck:0x%p", tlck); return; } /* * dataLog() * * function: log data tlock */ static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck) { struct metapage *mp; pxd_t *pxd; mp = tlck->mp; /* initialize as REDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_DATA); lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); pxd = &lrd->log.redopage.pxd; /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); if (jfs_dirtable_inline(tlck->ip)) { /* * The table has been truncated, we've must have deleted * the last entry, so don't bother logging this */ mp->lid = 0; grab_metapage(mp); metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; return; } PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * dtLog() * * function: log dtree tlock and format maplock to update bmap; */ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct metapage *mp; struct pxd_lock *pxdlock; pxd_t *pxd; mp = tlck->mp; /* initialize as REDOPAGE/NOREDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); /* * page extension via relocation: entry insertion; * page extension in-place: entry insertion; * new right page from page split, reinitialized in-line * root from root page split: entry insertion; */ if (tlck->type & (tlckNEW | tlckEXTEND)) { /* log after-image of the new page for logredo(): * mark log (LOG_NEW) for logredo() to initialize * freelist and update bmap for alloc of the new page; */ lrd->type = cpu_to_le16(LOG_REDOPAGE); if (tlck->type & tlckEXTEND) lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); else lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bPMAP for * alloc of the new page; */ if (tlck->type & tlckBTROOT) return; tlck->flag |= tlckUPDATEMAP; pxdlock = (struct pxd_lock *) & tlck->lock; pxdlock->flag = mlckALLOCPXD; pxdlock->pxd = *pxd; pxdlock->index = 1; /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * entry insertion/deletion, * sibling page link update (old right page before split); */ if (tlck->type & (tlckENTRY | tlckRELINK)) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * page deletion: page has been invalidated * page relocation: source extent * * a maplock for free of the page has been formatted * at txLock() time); */ if (tlck->type & (tlckFREE | tlckRELOCATE)) { /* log LOG_NOREDOPAGE of the deleted page for logredo() * to start NoRedoPage filter and to update bmap for free * of the deletd page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); pxdlock = (struct pxd_lock *) & tlck->lock; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time; */ tlck->flag |= tlckUPDATEMAP; } return; } /* * xtLog() * * function: log xtree tlock and format maplock to update bmap; */ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct inode *ip; struct metapage *mp; xtpage_t *p; struct xtlock *xtlck; struct maplock *maplock; struct xdlistlock *xadlock; struct pxd_lock *pxdlock; pxd_t *page_pxd; int next, lwm, hwm; ip = tlck->ip; mp = tlck->mp; /* initialize as REDOPAGE/NOREDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); page_pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) { lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); p = (xtpage_t *) &JFS_IP(ip)->i_xtroot; if (S_ISDIR(ip->i_mode)) lrd->log.redopage.type |= cpu_to_le16(LOG_DIR_XTREE); } else p = (xtpage_t *) mp->data; next = le16_to_cpu(p->header.nextindex); xtlck = (struct xtlock *) & tlck->lock; maplock = (struct maplock *) & tlck->lock; xadlock = (struct xdlistlock *) maplock; /* * entry insertion/extension; * sibling page link update (old right page before split); */ if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { /* log after-image for logredo(): * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. */ lwm = xtlck->lwm.offset; if (lwm == 0) lwm = XTPAGEMAXSLOT; if (lwm == next) goto out; if (lwm > next) { jfs_err("xtLog: lwm > next"); goto out; } tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock */ xadlock->flag = mlckALLOCPXDLIST; pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) { PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); p->xad[lwm + i].flag &= ~(XAD_NEW | XAD_EXTENDED); pxd++; } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ xadlock->flag = mlckALLOCXADLIST; xadlock->xdlist = &p->xad[lwm]; tblk->xflag &= ~COMMIT_LAZY; } jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); maplock->index = 1; out: /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * page deletion: file deletion/truncation (ref. xtTruncate()) * * (page will be invalidated after log is written and bmap * is updated from the page); */ if (tlck->type & tlckFREE) { /* LOG_NOREDOPAGE log for NoRedoPage filter: * if page free from file delete, NoRedoFile filter from * inode image of zero link count will subsume NoRedoPage * filters for each page; * if page free from file truncattion, write NoRedoPage * filter; * * upadte of block allocation map for the page itself: * if page free from deletion and truncation, LOG_UPDATEMAP * log for the page itself is generated from processing * its parent page xad entries; */ /* if page free from file truncation, log LOG_NOREDOPAGE * of the deleted page for logredo() to start NoRedoPage * filter for the page; */ if (tblk->xflag & COMMIT_TRUNCATE) { /* write NOREDOPAGE for the page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb-> s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); if (tlck->type & tlckBTROOT) { /* Empty xtree must be logged */ lrd->type = cpu_to_le16(LOG_REDOPAGE); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); } } /* init LOG_UPDATEMAP of the freed extents * XAD[XTENTRYSTART:hwm) from the deleted page itself * for logredo() to update bmap; */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); xtlck = (struct xtlock *) & tlck->lock; hwm = xtlck->hwm.offset; lrd->log.updatemap.nxd = cpu_to_le16(hwm - XTENTRYSTART + 1); /* reformat linelock for lmLog() */ xtlck->header.offset = XTENTRYSTART; xtlck->header.length = hwm - XTENTRYSTART + 1; xtlck->index = 1; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bmap * to free extents of XAD[XTENTRYSTART:hwm) from the * deleted page itself; */ tlck->flag |= tlckUPDATEMAP; xadlock->count = hwm - XTENTRYSTART + 1; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock */ xadlock->flag = mlckFREEPXDLIST; pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) { PXDaddress(pxd, addressXAD(&p->xad[XTENTRYSTART + i])); PXDlength(pxd, lengthXAD(&p->xad[XTENTRYSTART + i])); pxd++; } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ xadlock->flag = mlckFREEXADLIST; xadlock->xdlist = &p->xad[XTENTRYSTART]; tblk->xflag &= ~COMMIT_LAZY; } jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", tlck->ip, mp, xadlock->count); maplock->index = 1; /* mark page as invalid */ if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) && !(tlck->type & tlckBTROOT)) tlck->flag |= tlckFREEPAGE; /* else (tblk->xflag & COMMIT_PMAP) ? release the page; */ return; } /* * page/entry truncation: file truncation (ref. xtTruncate()) * * |----------+------+------+---------------| * | | | * | | hwm - hwm before truncation * | next - truncation point * lwm - lwm before truncation * header ? */ if (tlck->type & tlckTRUNCATE) { pxd_t pxd; /* truncated extent of xad */ int twm; /* * For truncation the entire linelock may be used, so it would * be difficult to store xad list in linelock itself. * Therefore, we'll just force transaction to be committed * synchronously, so that xtree pages won't be changed before * txUpdateMap runs. */ tblk->xflag &= ~COMMIT_LAZY; lwm = xtlck->lwm.offset; if (lwm == 0) lwm = XTPAGEMAXSLOT; hwm = xtlck->hwm.offset; twm = xtlck->twm.offset; /* * write log records */ /* log after-image for logredo(): * * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* * truncate entry XAD[twm == next - 1]: */ if (twm == next - 1) { /* init LOG_UPDATEMAP for logredo() to update bmap for * free of truncated delta extent of the truncated * entry XAD[next - 1]: * (xtlck->pxdlock = truncated delta extent); */ pxdlock = (struct pxd_lock *) & xtlck->pxdlock; /* assert(pxdlock->type & tlckTRUNCATE); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; pxd = pxdlock->pxd; /* save to format maplock */ lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); } /* * free entries XAD[next:hwm]: */ if (hwm >= next) { /* init LOG_UPDATEMAP of the freed extents * XAD[next:hwm] from the deleted page itself * for logredo() to update bmap; */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); xtlck = (struct xtlock *) & tlck->lock; hwm = xtlck->hwm.offset; lrd->log.updatemap.nxd = cpu_to_le16(hwm - next + 1); /* reformat linelock for lmLog() */ xtlck->header.offset = next; xtlck->header.length = hwm - next + 1; xtlck->index = 1; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); } /* * format maplock(s) for txUpdateMap() to update bmap */ maplock->index = 0; /* * allocate entries XAD[lwm:next): */ if (lwm < next) { /* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. */ tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; xadlock->xdlist = &p->xad[lwm]; jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d", tlck->ip, mp, xadlock->count, lwm, next); maplock->index++; xadlock++; } /* * truncate entry XAD[twm == next - 1]: */ if (twm == next - 1) { /* format a maplock for txUpdateMap() to update bmap * to free truncated delta extent of the truncated * entry XAD[next - 1]; * (xtlck->pxdlock = truncated delta extent); */ tlck->flag |= tlckUPDATEMAP; pxdlock = (struct pxd_lock *) xadlock; pxdlock->flag = mlckFREEPXD; pxdlock->count = 1; pxdlock->pxd = pxd; jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d", ip, mp, pxdlock->count, hwm); maplock->index++; xadlock++; } /* * free entries XAD[next:hwm]: */ if (hwm >= next) { /* format a maplock for txUpdateMap() to update bmap * to free extents of XAD[next:hwm] from thedeleted * page itself; */ tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckFREEXADLIST; xadlock->count = hwm - next + 1; xadlock->xdlist = &p->xad[next]; jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d", tlck->ip, mp, xadlock->count, next, hwm); maplock->index++; } /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } return; } /* * mapLog() * * function: log from maplock of freed data extents; */ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct pxd_lock *pxdlock; int i, nlock; pxd_t *pxd; /* * page relocation: free the source page extent * * a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time saving the src * relocated page address; */ if (tlck->type & tlckRELOCATE) { /* log LOG_NOREDOPAGE of the old relocated page * for logredo() to start NoRedoPage filter; */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); pxdlock = (struct pxd_lock *) & tlck->lock; pxd = &lrd->log.redopage.pxd; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* (N.B. currently, logredo() does NOT update bmap * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); * if page free from relocation, LOG_UPDATEMAP log is * specifically generated now for logredo() * to update bmap for free of src relocated page; * (new flag LOG_RELOCATE may be introduced which will * inform logredo() to start NORedoPage filter and also * update block allocation map at the same time, thus * avoiding an extra log write); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time; */ tlck->flag |= tlckUPDATEMAP; return; } /* * Otherwise it's not a relocate request * */ else { /* log LOG_UPDATEMAP for logredo() to update bmap for * free of truncated/relocated delta extent of the data; * e.g.: external EA extent, relocated/truncated extent * from xtTailgate(); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); pxdlock = (struct pxd_lock *) & tlck->lock; nlock = pxdlock->index; for (i = 0; i < nlock; i++, pxdlock++) { if (pxdlock->flag & mlckALLOCPXD) lrd->log.updatemap.type = cpu_to_le16(LOG_ALLOCPXD); else lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", (ulong) addressPXD(&pxdlock->pxd), lengthPXD(&pxdlock->pxd)); } /* update bmap */ tlck->flag |= tlckUPDATEMAP; } } /* * txEA() * * function: acquire maplock for EA/ACL extents or * set COMMIT_INLINE flag; */ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) { struct tlock *tlck = NULL; struct pxd_lock *maplock = NULL, *pxdlock = NULL; /* * format maplock for alloc of new EA extent */ if (newea) { /* Since the newea could be a completely zeroed entry we need to * check for the two flags which indicate we should actually * commit new EA data */ if (newea->flag & DXD_EXTENT) { tlck = txMaplock(tid, ip, tlckMAP); maplock = (struct pxd_lock *) & tlck->lock; pxdlock = (struct pxd_lock *) maplock; pxdlock->flag = mlckALLOCPXD; PXDaddress(&pxdlock->pxd, addressDXD(newea)); PXDlength(&pxdlock->pxd, lengthDXD(newea)); pxdlock++; maplock->index = 1; } else if (newea->flag & DXD_INLINE) { tlck = NULL; set_cflag(COMMIT_Inlineea, ip); } } /* * format maplock for free of old EA extent */ if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { if (tlck == NULL) { tlck = txMaplock(tid, ip, tlckMAP); maplock = (struct pxd_lock *) & tlck->lock; pxdlock = (struct pxd_lock *) maplock; maplock->index = 0; } pxdlock->flag = mlckFREEPXD; PXDaddress(&pxdlock->pxd, addressDXD(oldea)); PXDlength(&pxdlock->pxd, lengthDXD(oldea)); maplock->index++; } } /* * txForce() * * function: synchronously write pages locked by transaction * after txLog() but before txUpdateMap(); */ static void txForce(struct tblock * tblk) { struct tlock *tlck; lid_t lid, next; struct metapage *mp; /* * reverse the order of transaction tlocks in * careful update order of address index pages * (right to left, bottom up) */ tlck = lid_to_tlock(tblk->next); lid = tlck->next; tlck->next = 0; while (lid) { tlck = lid_to_tlock(lid); next = tlck->next; tlck->next = tblk->next; tblk->next = lid; lid = next; } /* * synchronously write the page, and * hold the page for txUpdateMap(); */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); if (tlck->flag & tlckWRITEPAGE) { tlck->flag &= ~tlckWRITEPAGE; /* do not release page to freelist */ force_metapage(mp); #if 0 /* * The "right" thing to do here is to * synchronously write the metadata. * With the current implementation this * is hard since write_metapage requires * us to kunmap & remap the page. If we * have tlocks pointing into the metadata * pages, we don't want to do this. I think * we can get by with synchronously writing * the pages when they are released. */ assert(mp->nohomeok); set_bit(META_dirty, &mp->flag); set_bit(META_sync, &mp->flag); #endif } } } } /* * txUpdateMap() * * function: update persistent allocation map (and working map * if appropriate); * * parameter: */ static void txUpdateMap(struct tblock * tblk) { struct inode *ip; struct inode *ipimap; lid_t lid; struct tlock *tlck; struct maplock *maplock; struct pxd_lock pxdlock; int maptype; int k, nlock; struct metapage *mp = NULL; ipimap = JFS_SBI(tblk->sb)->ipimap; maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; /* * update block allocation map * * update allocation state in pmap (and wmap) and * update lsn of the pmap page; */ /* * scan each tlock/page of transaction for block allocation/free: * * for each tlock/page of transaction, update map. * ? are there tlock for pmap and pwmap at the same time ? */ for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); if ((tlck->flag & tlckUPDATEMAP) == 0) continue; if (tlck->flag & tlckFREEPAGE) { /* * Another thread may attempt to reuse freed space * immediately, so we want to get rid of the metapage * before anyone else has a chance to get it. * Lock metapage, update maps, then invalidate * the metapage. */ mp = tlck->mp; ASSERT(mp->xflag & COMMIT_PAGE); grab_metapage(mp); } /* * extent list: * . in-line PXD list: * . out-of-line XAD list: */ maplock = (struct maplock *) & tlck->lock; nlock = maplock->index; for (k = 0; k < nlock; k++, maplock++) { /* * allocate blocks in persistent map: * * blocks have been allocated from wmap at alloc time; */ if (maplock->flag & mlckALLOC) { txAllocPMap(ipimap, maplock, tblk); } /* * free blocks in persistent and working map: * blocks will be freed in pmap and then in wmap; * * ? tblock specifies the PMAP/PWMAP based upon * transaction * * free blocks in persistent map: * blocks will be freed from wmap at last reference * release of the object for regular files; * * Alway free blocks from both persistent & working * maps for directories */ else { /* (maplock->flag & mlckFREE) */ if (tlck->flag & tlckDIRECTORY) txFreeMap(ipimap, maplock, tblk, COMMIT_PWMAP); else txFreeMap(ipimap, maplock, tblk, maptype); } } if (tlck->flag & tlckFREEPAGE) { if (!(tblk->flag & tblkGC_LAZY)) { /* This is equivalent to txRelease */ ASSERT(mp->lid == lid); tlck->mp->lid = 0; } assert(mp->nohomeok == 1); metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; } } /* * update inode allocation map * * update allocation state in pmap and * update lsn of the pmap page; * update in-memory inode flag/state * * unlock mapper/write lock */ if (tblk->xflag & COMMIT_CREATE) { diUpdatePMap(ipimap, tblk->ino, false, tblk); /* update persistent block allocation map * for the allocation of inode extent; */ pxdlock.flag = mlckALLOCPXD; pxdlock.pxd = tblk->u.ixpxd; pxdlock.index = 1; txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); } else if (tblk->xflag & COMMIT_DELETE) { ip = tblk->u.ip; diUpdatePMap(ipimap, ip->i_ino, true, tblk); iput(ip); } } /* * txAllocPMap() * * function: allocate from persistent map; * * parameter: * ipbmap - * malock - * xad list: * pxd: * * maptype - * allocate from persistent map; * free from persistent map; * (e.g., tmp file - free from working map at releae * of last reference); * free from persistent and working map; * * lsn - log sequence number; */ static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk) { struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct xdlistlock *xadlistlock; xad_t *xad; s64 xaddr; int xlen; struct pxd_lock *pxdlock; struct xdlistlock *pxdlistlock; pxd_t *pxd; int n; /* * allocate from persistent map; */ if (maplock->flag & mlckALLOCXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); xad->flag &= ~(XAD_NEW | XAD_EXTENDED); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } else if (maplock->flag & mlckALLOCPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckALLOCPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } /* * txFreeMap() * * function: free from persistent and/or working map; * * todo: optimization */ void txFreeMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk, int maptype) { struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct xdlistlock *xadlistlock; xad_t *xad; s64 xaddr; int xlen; struct pxd_lock *pxdlock; struct xdlistlock *pxdlistlock; pxd_t *pxd; int n; jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", tblk, maplock, maptype); /* * free from persistent map; */ if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { if (maplock->flag & mlckFREEXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { if (!(xad->flag & XAD_NEW)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } else if (maplock->flag & mlckFREEPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckALLOCPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } /* * free from working map; */ if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { if (maplock->flag & mlckFREEXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbFree(ip, xaddr, (s64) xlen); xad->flag = 0; jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } else if (maplock->flag & mlckFREEPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbFree(ip, xaddr, (s64) xlen); jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckFREEPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbFree(ip, xaddr, (s64) xlen); jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } } /* * txFreelock() * * function: remove tlock from inode anonymous locklist */ void txFreelock(struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct tlock *xtlck, *tlck; lid_t xlid = 0, lid; if (!jfs_ip->atlhead) return; TXN_LOCK(); xtlck = (struct tlock *) &jfs_ip->atlhead; while ((lid = xtlck->next) != 0) { tlck = lid_to_tlock(lid); if (tlck->flag & tlckFREELOCK) { xtlck->next = tlck->next; txLockFree(lid); } else { xtlck = tlck; xlid = lid; } } if (jfs_ip->atlhead) jfs_ip->atltail = xlid; else { jfs_ip->atltail = 0; /* * If inode was on anon_list, remove it */ list_del_init(&jfs_ip->anon_inode_list); } TXN_UNLOCK(); } /* * txAbort() * * function: abort tx before commit; * * frees line-locks and segment locks for all * segments in comdata structure. * Optionally sets state of file-system to FM_DIRTY in super-block. * log age of page-frames in memory for which caller has * are reset to 0 (to avoid logwarap). */ void txAbort(tid_t tid, int dirty) { lid_t lid, next; struct metapage *mp; struct tblock *tblk = tid_to_tblock(tid); struct tlock *tlck; /* * free tlocks of the transaction */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; mp = tlck->mp; JFS_IP(tlck->ip)->xtlid = 0; if (mp) { mp->lid = 0; /* * reset lsn of page to avoid logwarap: * * (page may have been previously committed by another * transaction(s) but has not been paged, i.e., * it may be on logsync list even though it has not * been logged for the current tx.) */ if (mp->xflag & COMMIT_PAGE && mp->lsn) LogSyncRelease(mp); } /* insert tlock at head of freelist */ TXN_LOCK(); txLockFree(lid); TXN_UNLOCK(); } /* caller will free the transaction block */ tblk->next = tblk->last = 0; /* * mark filesystem dirty */ if (dirty) jfs_error(tblk->sb, "\n"); return; } /* * txLazyCommit(void) * * All transactions except those changing ipimap (COMMIT_FORCE) are * processed by this routine. This insures that the inode and block * allocation maps are updated in order. For synchronous transactions, * let the user thread finish processing after txUpdateMap() is called. */ static void txLazyCommit(struct tblock * tblk) { struct jfs_log *log; while (((tblk->flag & tblkGC_READY) == 0) && ((tblk->flag & tblkGC_UNLOCKED) == 0)) { /* We must have gotten ahead of the user thread */ jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); yield(); } jfs_info("txLazyCommit: processing tblk 0x%p", tblk); txUpdateMap(tblk); log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; spin_lock_irq(&log->gclock); // LOGGC_LOCK tblk->flag |= tblkGC_COMMITTED; if (tblk->flag & tblkGC_READY) log->gcrtc--; wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP /* * Can't release log->gclock until we've tested tblk->flag */ if (tblk->flag & tblkGC_LAZY) { spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK txUnlock(tblk); tblk->flag &= ~tblkGC_LAZY; txEnd(tblk - TxBlock); /* Convert back to tid */ } else spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); } /* * jfs_lazycommit(void) * * To be run as a kernel daemon. If lbmIODone is called in an interrupt * context, or where blocking is not wanted, this routine will process * committed transactions from the unlock queue. */ int jfs_lazycommit(void *arg) { int WorkDone; struct tblock *tblk; unsigned long flags; struct jfs_sb_info *sbi; set_freezable(); do { LAZY_LOCK(flags); jfs_commit_thread_waking = 0; /* OK to wake another thread */ while (!list_empty(&TxAnchor.unlock_queue)) { WorkDone = 0; list_for_each_entry(tblk, &TxAnchor.unlock_queue, cqueue) { sbi = JFS_SBI(tblk->sb); /* * For each volume, the transactions must be * handled in order. If another commit thread * is handling a tblk for this superblock, * skip it */ if (sbi->commit_state & IN_LAZYCOMMIT) continue; sbi->commit_state |= IN_LAZYCOMMIT; WorkDone = 1; /* * Remove transaction from queue */ list_del(&tblk->cqueue); LAZY_UNLOCK(flags); txLazyCommit(tblk); LAZY_LOCK(flags); sbi->commit_state &= ~IN_LAZYCOMMIT; /* * Don't continue in the for loop. (We can't * anyway, it's unsafe!) We want to go back to * the beginning of the list. */ break; } /* If there was nothing to do, don't continue */ if (!WorkDone) break; } /* In case a wakeup came while all threads were active */ jfs_commit_thread_waking = 0; if (freezing(current)) { LAZY_UNLOCK(flags); try_to_freeze(); } else { DECLARE_WAITQUEUE(wq, current); add_wait_queue(&jfs_commit_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); LAZY_UNLOCK(flags); schedule(); remove_wait_queue(&jfs_commit_thread_wait, &wq); } } while (!kthread_should_stop()); if (!list_empty(&TxAnchor.unlock_queue)) jfs_err("jfs_lazycommit being killed w/pending transactions!"); else jfs_info("jfs_lazycommit being killed"); return 0; } void txLazyUnlock(struct tblock * tblk) { unsigned long flags; LAZY_LOCK(flags); list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); /* * Don't wake up a commit thread if there is already one servicing * this superblock, or if the last one we woke up hasn't started yet. */ if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && !jfs_commit_thread_waking) { jfs_commit_thread_waking = 1; wake_up(&jfs_commit_thread_wait); } LAZY_UNLOCK(flags); } static void LogSyncRelease(struct metapage * mp) { struct jfs_log *log = mp->log; assert(mp->nohomeok); assert(log); metapage_homeok(mp); } /* * txQuiesce * * Block all new transactions and push anonymous transactions to * completion * * This does almost the same thing as jfs_sync below. We don't * worry about deadlocking when jfs_tlocks_low is set, since we would * expect jfs_sync to get us out of that jam. */ void txQuiesce(struct super_block *sb) { struct inode *ip; struct jfs_inode_info *jfs_ip; struct jfs_log *log = JFS_SBI(sb)->log; tid_t tid; set_bit(log_QUIESCE, &log->flag); TXN_LOCK(); restart: while (!list_empty(&TxAnchor.anon_list)) { jfs_ip = list_entry(TxAnchor.anon_list.next, struct jfs_inode_info, anon_inode_list); ip = &jfs_ip->vfs_inode; /* * inode will be removed from anonymous list * when it is committed */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); mutex_lock(&jfs_ip->commit_mutex); txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); /* * Just to be safe. I don't know how * long we can run without blocking */ cond_resched(); TXN_LOCK(); } /* * If jfs_sync is running in parallel, there could be some inodes * on anon_list2. Let's check. */ if (!list_empty(&TxAnchor.anon_list2)) { list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); goto restart; } TXN_UNLOCK(); /* * We may need to kick off the group commit */ jfs_flush_journal(log, 0); } /* * txResume() * * Allows transactions to start again following txQuiesce */ void txResume(struct super_block *sb) { struct jfs_log *log = JFS_SBI(sb)->log; clear_bit(log_QUIESCE, &log->flag); TXN_WAKEUP(&log->syncwait); } /* * jfs_sync(void) * * To be run as a kernel daemon. This is awakened when tlocks run low. * We write any inodes that have anonymous tlocks so they will become * available. */ int jfs_sync(void *arg) { struct inode *ip; struct jfs_inode_info *jfs_ip; tid_t tid; set_freezable(); do { /* * write each inode on the anonymous inode list */ TXN_LOCK(); while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { jfs_ip = list_entry(TxAnchor.anon_list.next, struct jfs_inode_info, anon_inode_list); ip = &jfs_ip->vfs_inode; if (! igrab(ip)) { /* * Inode is being freed */ list_del_init(&jfs_ip->anon_inode_list); } else if (mutex_trylock(&jfs_ip->commit_mutex)) { /* * inode will be removed from anonymous list * when it is committed */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE); txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); iput(ip); /* * Just to be safe. I don't know how * long we can run without blocking */ cond_resched(); TXN_LOCK(); } else { /* We can't get the commit mutex. It may * be held by a thread waiting for tlock's * so let's not block here. Save it to * put back on the anon_list. */ /* Move from anon_list to anon_list2 */ list_move(&jfs_ip->anon_inode_list, &TxAnchor.anon_list2); TXN_UNLOCK(); iput(ip); TXN_LOCK(); } } /* Add anon_list2 back to anon_list */ list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); if (freezing(current)) { TXN_UNLOCK(); try_to_freeze(); } else { set_current_state(TASK_INTERRUPTIBLE); TXN_UNLOCK(); schedule(); } } while (!kthread_should_stop()); jfs_info("jfs_sync being killed"); return 0; } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) int jfs_txanchor_proc_show(struct seq_file *m, void *v) { char *freewait; char *freelockwait; char *lowlockwait; freewait = waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; freelockwait = waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; lowlockwait = waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; seq_printf(m, "JFS TxAnchor\n" "============\n" "freetid = %d\n" "freewait = %s\n" "freelock = %d\n" "freelockwait = %s\n" "lowlockwait = %s\n" "tlocksInUse = %d\n" "jfs_tlocks_low = %d\n" "unlock_queue is %sempty\n", TxAnchor.freetid, freewait, TxAnchor.freelock, freelockwait, lowlockwait, TxAnchor.tlocksInUse, jfs_tlocks_low, list_empty(&TxAnchor.unlock_queue) ? "" : "not "); return 0; } #endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) int jfs_txstats_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS TxStats\n" "===========\n" "calls to txBegin = %d\n" "txBegin blocked by sync barrier = %d\n" "txBegin blocked by tlocks low = %d\n" "txBegin blocked by no free tid = %d\n" "calls to txBeginAnon = %d\n" "txBeginAnon blocked by sync barrier = %d\n" "txBeginAnon blocked by tlocks low = %d\n" "calls to txLockAlloc = %d\n" "tLockAlloc blocked by no free lock = %d\n", TxStat.txBegin, TxStat.txBegin_barrier, TxStat.txBegin_lockslow, TxStat.txBegin_freetid, TxStat.txBeginAnon, TxStat.txBeginAnon_barrier, TxStat.txBeginAnon_lockslow, TxStat.txLockAlloc, TxStat.txLockAlloc_freelock); return 0; } #endif
48 109 99 16 107 70 23 107 109 23665 23758 23654 21836 5 269 23621 167 167 167 157 157 157 157 157 167 32 157 157 157 157 157 148 20 8 20 20 20 4 8 18 15 15 166 140 40 45 167 167 167 167 167 167 29 14 28 29 29 8 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/domain.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" #include <linux/binfmts.h> #include <linux/slab.h> #include <linux/rculist.h> /* Variables definitions.*/ /* The initial domain. */ struct tomoyo_domain_info tomoyo_kernel_domain; /** * tomoyo_update_policy - Update an entry for exception policy. * * @new_entry: Pointer to "struct tomoyo_acl_info". * @size: Size of @new_entry in bytes. * @param: Pointer to "struct tomoyo_acl_param". * @check_duplicate: Callback function to find duplicated entry. * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size, struct tomoyo_acl_param *param, bool (*check_duplicate)(const struct tomoyo_acl_head *, const struct tomoyo_acl_head *)) { int error = param->is_delete ? -ENOENT : -ENOMEM; struct tomoyo_acl_head *entry; struct list_head *list = param->list; if (mutex_lock_interruptible(&tomoyo_policy_lock)) return -ENOMEM; list_for_each_entry_rcu(entry, list, list, srcu_read_lock_held(&tomoyo_ss)) { if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS) continue; if (!check_duplicate(entry, new_entry)) continue; entry->is_deleted = param->is_delete; error = 0; break; } if (error && !param->is_delete) { entry = tomoyo_commit_ok(new_entry, size); if (entry) { list_add_tail_rcu(&entry->list, list); error = 0; } } mutex_unlock(&tomoyo_policy_lock); return error; } /** * tomoyo_same_acl_head - Check for duplicated "struct tomoyo_acl_info" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b, false otherwise. */ static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { return a->type == b->type && a->cond == b->cond; } /** * tomoyo_update_domain - Update an entry for domain policy. * * @new_entry: Pointer to "struct tomoyo_acl_info". * @size: Size of @new_entry in bytes. * @param: Pointer to "struct tomoyo_acl_param". * @check_duplicate: Callback function to find duplicated entry. * @merge_duplicate: Callback function to merge duplicated entry. * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size, struct tomoyo_acl_param *param, bool (*check_duplicate)(const struct tomoyo_acl_info *, const struct tomoyo_acl_info *), bool (*merge_duplicate)(struct tomoyo_acl_info *, struct tomoyo_acl_info *, const bool)) { const bool is_delete = param->is_delete; int error = is_delete ? -ENOENT : -ENOMEM; struct tomoyo_acl_info *entry; struct list_head * const list = param->list; if (param->data[0]) { new_entry->cond = tomoyo_get_condition(param); if (!new_entry->cond) return -EINVAL; /* * Domain transition preference is allowed for only * "file execute" entries. */ if (new_entry->cond->transit && !(new_entry->type == TOMOYO_TYPE_PATH_ACL && container_of(new_entry, struct tomoyo_path_acl, head) ->perm == 1 << TOMOYO_TYPE_EXECUTE)) goto out; } if (mutex_lock_interruptible(&tomoyo_policy_lock)) goto out; list_for_each_entry_rcu(entry, list, list, srcu_read_lock_held(&tomoyo_ss)) { if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS) continue; if (!tomoyo_same_acl_head(entry, new_entry) || !check_duplicate(entry, new_entry)) continue; if (merge_duplicate) entry->is_deleted = merge_duplicate(entry, new_entry, is_delete); else entry->is_deleted = is_delete; error = 0; break; } if (error && !is_delete) { entry = tomoyo_commit_ok(new_entry, size); if (entry) { list_add_tail_rcu(&entry->list, list); error = 0; } } mutex_unlock(&tomoyo_policy_lock); out: tomoyo_put_condition(new_entry->cond); return error; } /** * tomoyo_check_acl - Do permission check. * * @r: Pointer to "struct tomoyo_request_info". * @check_entry: Callback function to check type specific parameters. * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ void tomoyo_check_acl(struct tomoyo_request_info *r, bool (*check_entry)(struct tomoyo_request_info *, const struct tomoyo_acl_info *)) { const struct tomoyo_domain_info *domain = r->domain; struct tomoyo_acl_info *ptr; const struct list_head *list = &domain->acl_info_list; u16 i = 0; retry: list_for_each_entry_rcu(ptr, list, list, srcu_read_lock_held(&tomoyo_ss)) { if (ptr->is_deleted || ptr->type != r->param_type) continue; if (!check_entry(r, ptr)) continue; if (!tomoyo_condition(r, ptr->cond)) continue; r->matched_acl = ptr; r->granted = true; return; } for (; i < TOMOYO_MAX_ACL_GROUPS; i++) { if (!test_bit(i, domain->group)) continue; list = &domain->ns->acl_group[i++]; goto retry; } r->granted = false; } /* The list for "struct tomoyo_domain_info". */ LIST_HEAD(tomoyo_domain_list); /** * tomoyo_last_word - Get last component of a domainname. * * @name: Domainname to check. * * Returns the last word of @domainname. */ static const char *tomoyo_last_word(const char *name) { const char *cp = strrchr(name, ' '); if (cp) return cp + 1; return name; } /** * tomoyo_same_transition_control - Check for duplicated "struct tomoyo_transition_control" entry. * * @a: Pointer to "struct tomoyo_acl_head". * @b: Pointer to "struct tomoyo_acl_head". * * Returns true if @a == @b, false otherwise. */ static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a, const struct tomoyo_acl_head *b) { const struct tomoyo_transition_control *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_transition_control *p2 = container_of(b, typeof(*p2), head); return p1->type == p2->type && p1->is_last_name == p2->is_last_name && p1->domainname == p2->domainname && p1->program == p2->program; } /** * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list. * * @param: Pointer to "struct tomoyo_acl_param". * @type: Type of this entry. * * Returns 0 on success, negative value otherwise. */ int tomoyo_write_transition_control(struct tomoyo_acl_param *param, const u8 type) { struct tomoyo_transition_control e = { .type = type }; int error = param->is_delete ? -ENOENT : -ENOMEM; char *program = param->data; char *domainname = strstr(program, " from "); if (domainname) { *domainname = '\0'; domainname += 6; } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP || type == TOMOYO_TRANSITION_CONTROL_KEEP) { domainname = program; program = NULL; } if (program && strcmp(program, "any")) { if (!tomoyo_correct_path(program)) return -EINVAL; e.program = tomoyo_get_name(program); if (!e.program) goto out; } if (domainname && strcmp(domainname, "any")) { if (!tomoyo_correct_domain(domainname)) { if (!tomoyo_correct_path(domainname)) goto out; e.is_last_name = true; } e.domainname = tomoyo_get_name(domainname); if (!e.domainname) goto out; } param->list = &param->ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL]; error = tomoyo_update_policy(&e.head, sizeof(e), param, tomoyo_same_transition_control); out: tomoyo_put_name(e.domainname); tomoyo_put_name(e.program); return error; } /** * tomoyo_scan_transition - Try to find specific domain transition type. * * @list: Pointer to "struct list_head". * @domainname: The name of current domain. * @program: The name of requested program. * @last_name: The last component of @domainname. * @type: One of values in "enum tomoyo_transition_type". * * Returns true if found one, false otherwise. * * Caller holds tomoyo_read_lock(). */ static inline bool tomoyo_scan_transition (const struct list_head *list, const struct tomoyo_path_info *domainname, const struct tomoyo_path_info *program, const char *last_name, const enum tomoyo_transition_type type) { const struct tomoyo_transition_control *ptr; list_for_each_entry_rcu(ptr, list, head.list, srcu_read_lock_held(&tomoyo_ss)) { if (ptr->head.is_deleted || ptr->type != type) continue; if (ptr->domainname) { if (!ptr->is_last_name) { if (ptr->domainname != domainname) continue; } else { /* * Use direct strcmp() since this is * unlikely used. */ if (strcmp(ptr->domainname->name, last_name)) continue; } } if (ptr->program && tomoyo_pathcmp(ptr->program, program)) continue; return true; } return false; } /** * tomoyo_transition_type - Get domain transition type. * * @ns: Pointer to "struct tomoyo_policy_namespace". * @domainname: The name of current domain. * @program: The name of requested program. * * Returns TOMOYO_TRANSITION_CONTROL_TRANSIT if executing @program causes * domain transition across namespaces, TOMOYO_TRANSITION_CONTROL_INITIALIZE if * executing @program reinitializes domain transition within that namespace, * TOMOYO_TRANSITION_CONTROL_KEEP if executing @program stays at @domainname , * others otherwise. * * Caller holds tomoyo_read_lock(). */ static enum tomoyo_transition_type tomoyo_transition_type (const struct tomoyo_policy_namespace *ns, const struct tomoyo_path_info *domainname, const struct tomoyo_path_info *program) { const char *last_name = tomoyo_last_word(domainname->name); enum tomoyo_transition_type type = TOMOYO_TRANSITION_CONTROL_NO_RESET; while (type < TOMOYO_MAX_TRANSITION_TYPE) { const struct list_head * const list = &ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL]; if (!tomoyo_scan_transition(list, domainname, program, last_name, type)) { type++; continue; } if (type != TOMOYO_TRANSITION_CONTROL_NO_RESET && type != TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE) break; /* * Do not check for reset_domain if no_reset_domain matched. * Do not check for initialize_domain if no_initialize_domain * matched. */ type++; type++; } return type; } /** * tomoyo_same_aggregator - Check for duplicated "struct tomoyo_aggregator" entry. * * @a: Pointer to "struct tomoyo_acl_head". * @b: Pointer to "struct tomoyo_acl_head". * * Returns true if @a == @b, false otherwise. */ static bool tomoyo_same_aggregator(const struct tomoyo_acl_head *a, const struct tomoyo_acl_head *b) { const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2), head); return p1->original_name == p2->original_name && p1->aggregated_name == p2->aggregated_name; } /** * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_write_aggregator(struct tomoyo_acl_param *param) { struct tomoyo_aggregator e = { }; int error = param->is_delete ? -ENOENT : -ENOMEM; const char *original_name = tomoyo_read_token(param); const char *aggregated_name = tomoyo_read_token(param); if (!tomoyo_correct_word(original_name) || !tomoyo_correct_path(aggregated_name)) return -EINVAL; e.original_name = tomoyo_get_name(original_name); e.aggregated_name = tomoyo_get_name(aggregated_name); if (!e.original_name || !e.aggregated_name || e.aggregated_name->is_patterned) /* No patterns allowed. */ goto out; param->list = &param->ns->policy_list[TOMOYO_ID_AGGREGATOR]; error = tomoyo_update_policy(&e.head, sizeof(e), param, tomoyo_same_aggregator); out: tomoyo_put_name(e.original_name); tomoyo_put_name(e.aggregated_name); return error; } /** * tomoyo_find_namespace - Find specified namespace. * * @name: Name of namespace to find. * @len: Length of @name. * * Returns pointer to "struct tomoyo_policy_namespace" if found, * NULL otherwise. * * Caller holds tomoyo_read_lock(). */ static struct tomoyo_policy_namespace *tomoyo_find_namespace (const char *name, const unsigned int len) { struct tomoyo_policy_namespace *ns; list_for_each_entry(ns, &tomoyo_namespace_list, namespace_list) { if (strncmp(name, ns->name, len) || (name[len] && name[len] != ' ')) continue; return ns; } return NULL; } /** * tomoyo_assign_namespace - Create a new namespace. * * @domainname: Name of namespace to create. * * Returns pointer to "struct tomoyo_policy_namespace" on success, * NULL otherwise. * * Caller holds tomoyo_read_lock(). */ struct tomoyo_policy_namespace *tomoyo_assign_namespace(const char *domainname) { struct tomoyo_policy_namespace *ptr; struct tomoyo_policy_namespace *entry; const char *cp = domainname; unsigned int len = 0; while (*cp && *cp++ != ' ') len++; ptr = tomoyo_find_namespace(domainname, len); if (ptr) return ptr; if (len >= TOMOYO_EXEC_TMPSIZE - 10 || !tomoyo_domain_def(domainname)) return NULL; entry = kzalloc(sizeof(*entry) + len + 1, GFP_NOFS | __GFP_NOWARN); if (mutex_lock_interruptible(&tomoyo_policy_lock)) goto out; ptr = tomoyo_find_namespace(domainname, len); if (!ptr && tomoyo_memory_ok(entry)) { char *name = (char *) (entry + 1); ptr = entry; memmove(name, domainname, len); name[len] = '\0'; entry->name = name; tomoyo_init_policy_namespace(entry); entry = NULL; } mutex_unlock(&tomoyo_policy_lock); out: kfree(entry); return ptr; } /** * tomoyo_namespace_jump - Check for namespace jump. * * @domainname: Name of domain. * * Returns true if namespace differs, false otherwise. */ static bool tomoyo_namespace_jump(const char *domainname) { const char *namespace = tomoyo_current_namespace()->name; const int len = strlen(namespace); return strncmp(domainname, namespace, len) || (domainname[len] && domainname[len] != ' '); } /** * tomoyo_assign_domain - Create a domain or a namespace. * * @domainname: The name of domain. * @transit: True if transit to domain found or created. * * Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise. * * Caller holds tomoyo_read_lock(). */ struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname, const bool transit) { struct tomoyo_domain_info e = { }; struct tomoyo_domain_info *entry = tomoyo_find_domain(domainname); bool created = false; if (entry) { if (transit) { /* * Since namespace is created at runtime, profiles may * not be created by the moment the process transits to * that domain. Do not perform domain transition if * profile for that domain is not yet created. */ if (tomoyo_policy_loaded && !entry->ns->profile_ptr[entry->profile]) return NULL; } return entry; } /* Requested domain does not exist. */ /* Don't create requested domain if domainname is invalid. */ if (strlen(domainname) >= TOMOYO_EXEC_TMPSIZE - 10 || !tomoyo_correct_domain(domainname)) return NULL; /* * Since definition of profiles and acl_groups may differ across * namespaces, do not inherit "use_profile" and "use_group" settings * by automatically creating requested domain upon domain transition. */ if (transit && tomoyo_namespace_jump(domainname)) return NULL; e.ns = tomoyo_assign_namespace(domainname); if (!e.ns) return NULL; /* * "use_profile" and "use_group" settings for automatically created * domains are inherited from current domain. These are 0 for manually * created domains. */ if (transit) { const struct tomoyo_domain_info *domain = tomoyo_domain(); e.profile = domain->profile; memcpy(e.group, domain->group, sizeof(e.group)); } e.domainname = tomoyo_get_name(domainname); if (!e.domainname) return NULL; if (mutex_lock_interruptible(&tomoyo_policy_lock)) goto out; entry = tomoyo_find_domain(domainname); if (!entry) { entry = tomoyo_commit_ok(&e, sizeof(e)); if (entry) { INIT_LIST_HEAD(&entry->acl_info_list); list_add_tail_rcu(&entry->list, &tomoyo_domain_list); created = true; } } mutex_unlock(&tomoyo_policy_lock); out: tomoyo_put_name(e.domainname); if (entry && transit) { if (created) { struct tomoyo_request_info r; int i; tomoyo_init_request_info(&r, entry, TOMOYO_MAC_FILE_EXECUTE); r.granted = false; tomoyo_write_log(&r, "use_profile %u\n", entry->profile); for (i = 0; i < TOMOYO_MAX_ACL_GROUPS; i++) if (test_bit(i, entry->group)) tomoyo_write_log(&r, "use_group %u\n", i); tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES); } } return entry; } /** * tomoyo_environ - Check permission for environment variable names. * * @ee: Pointer to "struct tomoyo_execve". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_environ(struct tomoyo_execve *ee) { struct tomoyo_request_info *r = &ee->r; struct linux_binprm *bprm = ee->bprm; /* env_page.data is allocated by tomoyo_dump_page(). */ struct tomoyo_page_dump env_page = { }; char *arg_ptr; /* Size is TOMOYO_EXEC_TMPSIZE bytes */ int arg_len = 0; unsigned long pos = bprm->p; int offset = pos % PAGE_SIZE; int argv_count = bprm->argc; int envp_count = bprm->envc; int error = -ENOMEM; ee->r.type = TOMOYO_MAC_ENVIRON; ee->r.profile = r->domain->profile; ee->r.mode = tomoyo_get_mode(r->domain->ns, ee->r.profile, TOMOYO_MAC_ENVIRON); if (!r->mode || !envp_count) return 0; arg_ptr = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS); if (!arg_ptr) goto out; while (error == -ENOMEM) { if (!tomoyo_dump_page(bprm, pos, &env_page)) goto out; pos += PAGE_SIZE - offset; /* Read. */ while (argv_count && offset < PAGE_SIZE) { if (!env_page.data[offset++]) argv_count--; } if (argv_count) { offset = 0; continue; } while (offset < PAGE_SIZE) { const unsigned char c = env_page.data[offset++]; if (c && arg_len < TOMOYO_EXEC_TMPSIZE - 10) { if (c == '=') { arg_ptr[arg_len++] = '\0'; } else if (c == '\\') { arg_ptr[arg_len++] = '\\'; arg_ptr[arg_len++] = '\\'; } else if (c > ' ' && c < 127) { arg_ptr[arg_len++] = c; } else { arg_ptr[arg_len++] = '\\'; arg_ptr[arg_len++] = (c >> 6) + '0'; arg_ptr[arg_len++] = ((c >> 3) & 7) + '0'; arg_ptr[arg_len++] = (c & 7) + '0'; } } else { arg_ptr[arg_len] = '\0'; } if (c) continue; if (tomoyo_env_perm(r, arg_ptr)) { error = -EPERM; break; } if (!--envp_count) { error = 0; break; } arg_len = 0; } offset = 0; } out: if (r->mode != TOMOYO_CONFIG_ENFORCING) error = 0; kfree(env_page.data); kfree(arg_ptr); return error; } /** * tomoyo_find_next_domain - Find a domain. * * @bprm: Pointer to "struct linux_binprm". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_find_next_domain(struct linux_binprm *bprm) { struct tomoyo_domain_info *old_domain = tomoyo_domain(); struct tomoyo_domain_info *domain = NULL; const char *original_name = bprm->filename; int retval = -ENOMEM; bool reject_on_transition_failure = false; const struct tomoyo_path_info *candidate; struct tomoyo_path_info exename; struct tomoyo_execve *ee = kzalloc(sizeof(*ee), GFP_NOFS); if (!ee) return -ENOMEM; ee->tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS); if (!ee->tmp) { kfree(ee); return -ENOMEM; } /* ee->dump->data is allocated by tomoyo_dump_page(). */ tomoyo_init_request_info(&ee->r, NULL, TOMOYO_MAC_FILE_EXECUTE); ee->r.ee = ee; ee->bprm = bprm; ee->r.obj = &ee->obj; ee->obj.path1 = bprm->file->f_path; /* * Get symlink's pathname of program, but fallback to realpath if * symlink's pathname does not exist or symlink's pathname refers * to proc filesystem (e.g. /dev/fd/<num> or /proc/self/fd/<num> ). */ exename.name = tomoyo_realpath_nofollow(original_name); if (exename.name && !strncmp(exename.name, "proc:/", 6)) { kfree(exename.name); exename.name = NULL; } if (!exename.name) { exename.name = tomoyo_realpath_from_path(&bprm->file->f_path); if (!exename.name) goto out; } tomoyo_fill_path_info(&exename); retry: /* Check 'aggregator' directive. */ { struct tomoyo_aggregator *ptr; struct list_head *list = &old_domain->ns->policy_list[TOMOYO_ID_AGGREGATOR]; /* Check 'aggregator' directive. */ candidate = &exename; list_for_each_entry_rcu(ptr, list, head.list, srcu_read_lock_held(&tomoyo_ss)) { if (ptr->head.is_deleted || !tomoyo_path_matches_pattern(&exename, ptr->original_name)) continue; candidate = ptr->aggregated_name; break; } } /* Check execute permission. */ retval = tomoyo_execute_permission(&ee->r, candidate); if (retval == TOMOYO_RETRY_REQUEST) goto retry; if (retval < 0) goto out; /* * To be able to specify domainnames with wildcards, use the * pathname specified in the policy (which may contain * wildcard) rather than the pathname passed to execve() * (which never contains wildcard). */ if (ee->r.param.path.matched_path) candidate = ee->r.param.path.matched_path; /* * Check for domain transition preference if "file execute" matched. * If preference is given, make execve() fail if domain transition * has failed, for domain transition preference should be used with * destination domain defined. */ if (ee->transition) { const char *domainname = ee->transition->name; reject_on_transition_failure = true; if (!strcmp(domainname, "keep")) goto force_keep_domain; if (!strcmp(domainname, "child")) goto force_child_domain; if (!strcmp(domainname, "reset")) goto force_reset_domain; if (!strcmp(domainname, "initialize")) goto force_initialize_domain; if (!strcmp(domainname, "parent")) { char *cp; strscpy(ee->tmp, old_domain->domainname->name, TOMOYO_EXEC_TMPSIZE); cp = strrchr(ee->tmp, ' '); if (cp) *cp = '\0'; } else if (*domainname == '<') strscpy(ee->tmp, domainname, TOMOYO_EXEC_TMPSIZE); else snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s", old_domain->domainname->name, domainname); goto force_jump_domain; } /* * No domain transition preference specified. * Calculate domain to transit to. */ switch (tomoyo_transition_type(old_domain->ns, old_domain->domainname, candidate)) { case TOMOYO_TRANSITION_CONTROL_RESET: force_reset_domain: /* Transit to the root of specified namespace. */ snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>", candidate->name); /* * Make execve() fail if domain transition across namespaces * has failed. */ reject_on_transition_failure = true; break; case TOMOYO_TRANSITION_CONTROL_INITIALIZE: force_initialize_domain: /* Transit to the child of current namespace's root. */ snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s", old_domain->ns->name, candidate->name); break; case TOMOYO_TRANSITION_CONTROL_KEEP: force_keep_domain: /* Keep current domain. */ domain = old_domain; break; default: if (old_domain == &tomoyo_kernel_domain && !tomoyo_policy_loaded) { /* * Needn't to transit from kernel domain before * starting /sbin/init. But transit from kernel domain * if executing initializers because they might start * before /sbin/init. */ domain = old_domain; break; } force_child_domain: /* Normal domain transition. */ snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s", old_domain->domainname->name, candidate->name); break; } force_jump_domain: if (!domain) domain = tomoyo_assign_domain(ee->tmp, true); if (domain) retval = 0; else if (reject_on_transition_failure) { pr_warn("ERROR: Domain '%s' not ready.\n", ee->tmp); retval = -ENOMEM; } else if (ee->r.mode == TOMOYO_CONFIG_ENFORCING) retval = -ENOMEM; else { retval = 0; if (!old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED]) { old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED] = true; ee->r.granted = false; tomoyo_write_log(&ee->r, "%s", tomoyo_dif [TOMOYO_DIF_TRANSITION_FAILED]); pr_warn("ERROR: Domain '%s' not defined.\n", ee->tmp); } } out: if (!domain) domain = old_domain; /* Update reference count on "struct tomoyo_domain_info". */ { struct tomoyo_task *s = tomoyo_task(current); s->old_domain_info = s->domain_info; s->domain_info = domain; atomic_inc(&domain->users); } kfree(exename.name); if (!retval) { ee->r.domain = domain; retval = tomoyo_environ(ee); } kfree(ee->tmp); kfree(ee->dump.data); kfree(ee); return retval; } /** * tomoyo_dump_page - Dump a page to buffer. * * @bprm: Pointer to "struct linux_binprm". * @pos: Location to dump. * @dump: Pointer to "struct tomoyo_page_dump". * * Returns true on success, false otherwise. */ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, struct tomoyo_page_dump *dump) { struct page *page; #ifdef CONFIG_MMU int ret; #endif /* dump->data is released by tomoyo_find_next_domain(). */ if (!dump->data) { dump->data = kzalloc(PAGE_SIZE, GFP_NOFS); if (!dump->data) return false; } /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */ #ifdef CONFIG_MMU /* * This is called at execve() time in order to dig around * in the argv/environment of the new process * (represented by bprm). */ mmap_read_lock(bprm->mm); ret = get_user_pages_remote(bprm->mm, pos, 1, FOLL_FORCE, &page, NULL); mmap_read_unlock(bprm->mm); if (ret <= 0) return false; #else page = bprm->page[pos / PAGE_SIZE]; #endif if (page != dump->page) { const unsigned int offset = pos % PAGE_SIZE; /* * Maybe kmap()/kunmap() should be used here. * But remove_arg_zero() uses kmap_atomic()/kunmap_atomic(). * So do I. */ char *kaddr = kmap_atomic(page); dump->page = page; memcpy(dump->data + offset, kaddr + offset, PAGE_SIZE - offset); kunmap_atomic(kaddr); } /* Same with put_arg_page(page) in fs/exec.c */ #ifdef CONFIG_MMU put_page(page); #endif return true; }
5 5 5 4 1 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 // SPDX-License-Identifier: GPL-2.0-or-later /* * Sonix sn9c201 sn9c202 library * * Copyright (C) 2012 Jean-Francois Moine <http://moinejf.free.fr> * Copyright (C) 2008-2009 microdia project <microdia@googlegroups.com> * Copyright (C) 2009 Brian Johnson <brijohn@gmail.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/input.h> #include "gspca.h" #include "jpeg.h" #include <linux/dmi.h> MODULE_AUTHOR("Brian Johnson <brijohn@gmail.com>, microdia project <microdia@googlegroups.com>"); MODULE_DESCRIPTION("GSPCA/SN9C20X USB Camera Driver"); MODULE_LICENSE("GPL"); /* * Pixel format private data */ #define SCALE_MASK 0x0f #define SCALE_160x120 0 #define SCALE_320x240 1 #define SCALE_640x480 2 #define SCALE_1280x1024 3 #define MODE_RAW 0x10 #define MODE_JPEG 0x20 #define MODE_SXGA 0x80 #define SENSOR_OV9650 0 #define SENSOR_OV9655 1 #define SENSOR_SOI968 2 #define SENSOR_OV7660 3 #define SENSOR_OV7670 4 #define SENSOR_MT9V011 5 #define SENSOR_MT9V111 6 #define SENSOR_MT9V112 7 #define SENSOR_MT9M001 8 #define SENSOR_MT9M111 9 #define SENSOR_MT9M112 10 #define SENSOR_HV7131R 11 #define SENSOR_MT9VPRB 12 /* camera flags */ #define HAS_NO_BUTTON 0x1 #define LED_REVERSE 0x2 /* some cameras unset gpio to turn on leds */ #define FLIP_DETECT 0x4 #define HAS_LED_TORCH 0x8 /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; struct { /* color control cluster */ struct v4l2_ctrl *brightness; struct v4l2_ctrl *contrast; struct v4l2_ctrl *saturation; struct v4l2_ctrl *hue; }; struct { /* blue/red balance control cluster */ struct v4l2_ctrl *blue; struct v4l2_ctrl *red; }; struct { /* h/vflip control cluster */ struct v4l2_ctrl *hflip; struct v4l2_ctrl *vflip; }; struct v4l2_ctrl *gamma; struct { /* autogain and exposure or gain control cluster */ struct v4l2_ctrl *autogain; struct v4l2_ctrl *exposure; struct v4l2_ctrl *gain; }; struct v4l2_ctrl *jpegqual; struct v4l2_ctrl *led_mode; struct work_struct work; u32 pktsz; /* (used by pkt_scan) */ u16 npkt; s8 nchg; u8 fmt; /* (used for JPEG QTAB update */ #define MIN_AVG_LUM 80 #define MAX_AVG_LUM 130 atomic_t avg_lum; u8 old_step; u8 older_step; u8 exposure_step; u8 i2c_addr; u8 i2c_intf; u8 sensor; u8 hstart; u8 vstart; u8 jpeg_hdr[JPEG_HDR_SZ]; u8 flags; }; static void qual_upd(struct work_struct *work); struct i2c_reg_u8 { u8 reg; u8 val; }; struct i2c_reg_u16 { u8 reg; u16 val; }; static const struct dmi_system_id flip_dmi_table[] = { { .ident = "MSI MS-1034", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MICRO-STAR INT'L CO.,LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "MS-1034"), DMI_MATCH(DMI_PRODUCT_VERSION, "0341") } }, { .ident = "MSI MS-1039", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MICRO-STAR INT'L CO.,LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "MS-1039"), } }, { .ident = "MSI MS-1632", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "MSI"), DMI_MATCH(DMI_BOARD_NAME, "MS-1632") } }, { .ident = "MSI MS-1633X", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "MSI"), DMI_MATCH(DMI_BOARD_NAME, "MS-1633X") } }, { .ident = "MSI MS-1635X", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "MSI"), DMI_MATCH(DMI_BOARD_NAME, "MS-1635X") } }, { .ident = "ASUSTeK W7J", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_BOARD_NAME, "W7J ") } }, {} }; static const struct v4l2_pix_format vga_mode[] = { {160, 120, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = SCALE_160x120 | MODE_JPEG}, {160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_160x120 | MODE_RAW}, {160, 120, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 240 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_160x120}, {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = SCALE_320x240 | MODE_JPEG}, {320, 240, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 , .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_320x240 | MODE_RAW}, {320, 240, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 480 * 240 , .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_320x240}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = SCALE_640x480 | MODE_JPEG}, {640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_640x480 | MODE_RAW}, {640, 480, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 960 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_640x480}, }; static const struct v4l2_pix_format sxga_mode[] = { {160, 120, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = SCALE_160x120 | MODE_JPEG}, {160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_160x120 | MODE_RAW}, {160, 120, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 240 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_160x120}, {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = SCALE_320x240 | MODE_JPEG}, {320, 240, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 , .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_320x240 | MODE_RAW}, {320, 240, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 480 * 240 , .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_320x240}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = SCALE_640x480 | MODE_JPEG}, {640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_640x480 | MODE_RAW}, {640, 480, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 960 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_640x480}, {1280, 1024, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 1280, .sizeimage = 1280 * 1024, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_1280x1024 | MODE_RAW | MODE_SXGA}, }; static const struct v4l2_pix_format mono_mode[] = { {160, 120, V4L2_PIX_FMT_GREY, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_160x120 | MODE_RAW}, {320, 240, V4L2_PIX_FMT_GREY, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 , .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_320x240 | MODE_RAW}, {640, 480, V4L2_PIX_FMT_GREY, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_640x480 | MODE_RAW}, {1280, 1024, V4L2_PIX_FMT_GREY, V4L2_FIELD_NONE, .bytesperline = 1280, .sizeimage = 1280 * 1024, .colorspace = V4L2_COLORSPACE_SRGB, .priv = SCALE_1280x1024 | MODE_RAW | MODE_SXGA}, }; static const s16 hsv_red_x[] = { 41, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 81, 83, 85, 87, 88, 90, 92, 93, 95, 97, 98, 100, 101, 102, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 123, 124, 125, 125, 126, 127, 127, 128, 128, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, 131, 131, 131, 131, 130, 130, 130, 130, 129, 129, 129, 128, 128, 127, 127, 126, 125, 125, 124, 123, 122, 122, 121, 120, 119, 118, 117, 116, 115, 114, 112, 111, 110, 109, 107, 106, 105, 103, 102, 101, 99, 98, 96, 94, 93, 91, 90, 88, 86, 84, 83, 81, 79, 77, 75, 74, 72, 70, 68, 66, 64, 62, 60, 58, 56, 54, 52, 49, 47, 45, 43, 41, 39, 36, 34, 32, 30, 28, 25, 23, 21, 19, 16, 14, 12, 9, 7, 5, 3, 0, -1, -3, -6, -8, -10, -12, -15, -17, -19, -22, -24, -26, -28, -30, -33, -35, -37, -39, -41, -44, -46, -48, -50, -52, -54, -56, -58, -60, -62, -64, -66, -68, -70, -72, -74, -76, -78, -80, -81, -83, -85, -87, -88, -90, -92, -93, -95, -97, -98, -100, -101, -102, -104, -105, -107, -108, -109, -110, -112, -113, -114, -115, -116, -117, -118, -119, -120, -121, -122, -123, -123, -124, -125, -125, -126, -127, -127, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -127, -127, -126, -125, -125, -124, -123, -122, -122, -121, -120, -119, -118, -117, -116, -115, -114, -112, -111, -110, -109, -107, -106, -105, -103, -102, -101, -99, -98, -96, -94, -93, -91, -90, -88, -86, -84, -83, -81, -79, -77, -75, -74, -72, -70, -68, -66, -64, -62, -60, -58, -56, -54, -52, -49, -47, -45, -43, -41, -39, -36, -34, -32, -30, -28, -25, -23, -21, -19, -16, -14, -12, -9, -7, -5, -3, 0, 1, 3, 6, 8, 10, 12, 15, 17, 19, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41 }; static const s16 hsv_red_y[] = { 82, 80, 78, 76, 74, 73, 71, 69, 67, 65, 63, 61, 58, 56, 54, 52, 50, 48, 46, 44, 41, 39, 37, 35, 32, 30, 28, 26, 23, 21, 19, 16, 14, 12, 10, 7, 5, 3, 0, -1, -3, -6, -8, -10, -13, -15, -17, -19, -22, -24, -26, -29, -31, -33, -35, -38, -40, -42, -44, -46, -48, -51, -53, -55, -57, -59, -61, -63, -65, -67, -69, -71, -73, -75, -77, -79, -81, -82, -84, -86, -88, -89, -91, -93, -94, -96, -98, -99, -101, -102, -104, -105, -106, -108, -109, -110, -112, -113, -114, -115, -116, -117, -119, -120, -120, -121, -122, -123, -124, -125, -126, -126, -127, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -127, -127, -126, -125, -125, -124, -123, -122, -121, -120, -119, -118, -117, -116, -115, -114, -113, -111, -110, -109, -107, -106, -105, -103, -102, -100, -99, -97, -96, -94, -92, -91, -89, -87, -85, -84, -82, -80, -78, -76, -74, -73, -71, -69, -67, -65, -63, -61, -58, -56, -54, -52, -50, -48, -46, -44, -41, -39, -37, -35, -32, -30, -28, -26, -23, -21, -19, -16, -14, -12, -10, -7, -5, -3, 0, 1, 3, 6, 8, 10, 13, 15, 17, 19, 22, 24, 26, 29, 31, 33, 35, 38, 40, 42, 44, 46, 48, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 82, 84, 86, 88, 89, 91, 93, 94, 96, 98, 99, 101, 102, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116, 117, 119, 120, 120, 121, 122, 123, 124, 125, 126, 126, 127, 128, 128, 129, 129, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 131, 131, 131, 130, 130, 130, 129, 129, 128, 127, 127, 126, 125, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 111, 110, 109, 107, 106, 105, 103, 102, 100, 99, 97, 96, 94, 92, 91, 89, 87, 85, 84, 82 }; static const s16 hsv_green_x[] = { -124, -124, -125, -125, -125, -125, -125, -125, -125, -126, -126, -125, -125, -125, -125, -125, -125, -124, -124, -124, -123, -123, -122, -122, -121, -121, -120, -120, -119, -118, -117, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -105, -104, -103, -102, -100, -99, -98, -96, -95, -93, -92, -91, -89, -87, -86, -84, -83, -81, -79, -77, -76, -74, -72, -70, -69, -67, -65, -63, -61, -59, -57, -55, -53, -51, -49, -47, -45, -43, -41, -39, -37, -35, -33, -30, -28, -26, -24, -22, -20, -18, -15, -13, -11, -9, -7, -4, -2, 0, 1, 3, 6, 8, 10, 12, 14, 17, 19, 21, 23, 25, 27, 29, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 71, 73, 75, 77, 78, 80, 82, 83, 85, 87, 88, 90, 91, 93, 94, 96, 97, 98, 100, 101, 102, 104, 105, 106, 107, 108, 109, 111, 112, 113, 113, 114, 115, 116, 117, 118, 118, 119, 120, 120, 121, 122, 122, 123, 123, 124, 124, 124, 125, 125, 125, 125, 125, 125, 125, 126, 126, 125, 125, 125, 125, 125, 125, 124, 124, 124, 123, 123, 122, 122, 121, 121, 120, 120, 119, 118, 117, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 105, 104, 103, 102, 100, 99, 98, 96, 95, 93, 92, 91, 89, 87, 86, 84, 83, 81, 79, 77, 76, 74, 72, 70, 69, 67, 65, 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, 30, 28, 26, 24, 22, 20, 18, 15, 13, 11, 9, 7, 4, 2, 0, -1, -3, -6, -8, -10, -12, -14, -17, -19, -21, -23, -25, -27, -29, -32, -34, -36, -38, -40, -42, -44, -46, -48, -50, -52, -54, -56, -58, -60, -62, -64, -66, -68, -70, -71, -73, -75, -77, -78, -80, -82, -83, -85, -87, -88, -90, -91, -93, -94, -96, -97, -98, -100, -101, -102, -104, -105, -106, -107, -108, -109, -111, -112, -113, -113, -114, -115, -116, -117, -118, -118, -119, -120, -120, -121, -122, -122, -123, -123, -124, -124 }; static const s16 hsv_green_y[] = { -100, -99, -98, -97, -95, -94, -93, -91, -90, -89, -87, -86, -84, -83, -81, -80, -78, -76, -75, -73, -71, -70, -68, -66, -64, -63, -61, -59, -57, -55, -53, -51, -49, -48, -46, -44, -42, -40, -38, -36, -34, -32, -30, -27, -25, -23, -21, -19, -17, -15, -13, -11, -9, -7, -4, -2, 0, 1, 3, 5, 7, 9, 11, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 59, 61, 63, 65, 67, 68, 70, 72, 74, 75, 77, 78, 80, 82, 83, 85, 86, 88, 89, 90, 92, 93, 95, 96, 97, 98, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 112, 113, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119, 119, 120, 120, 120, 120, 120, 121, 121, 121, 121, 121, 121, 120, 120, 120, 120, 120, 119, 119, 119, 118, 118, 117, 117, 116, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 100, 99, 98, 97, 95, 94, 93, 91, 90, 89, 87, 86, 84, 83, 81, 80, 78, 76, 75, 73, 71, 70, 68, 66, 64, 63, 61, 59, 57, 55, 53, 51, 49, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 4, 2, 0, -1, -3, -5, -7, -9, -11, -14, -16, -18, -20, -22, -24, -26, -28, -30, -32, -34, -36, -38, -40, -42, -44, -46, -48, -50, -52, -54, -56, -58, -59, -61, -63, -65, -67, -68, -70, -72, -74, -75, -77, -78, -80, -82, -83, -85, -86, -88, -89, -90, -92, -93, -95, -96, -97, -98, -100, -101, -102, -103, -104, -105, -106, -107, -108, -109, -110, -111, -112, -112, -113, -114, -115, -115, -116, -116, -117, -117, -118, -118, -119, -119, -119, -120, -120, -120, -120, -120, -121, -121, -121, -121, -121, -121, -120, -120, -120, -120, -120, -119, -119, -119, -118, -118, -117, -117, -116, -116, -115, -114, -114, -113, -112, -111, -111, -110, -109, -108, -107, -106, -105, -104, -103, -102, -100 }; static const s16 hsv_blue_x[] = { 112, 113, 114, 114, 115, 116, 117, 117, 118, 118, 119, 119, 120, 120, 120, 121, 121, 121, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 121, 121, 121, 120, 120, 120, 119, 119, 118, 118, 117, 116, 116, 115, 114, 113, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 100, 99, 98, 97, 95, 94, 93, 91, 90, 88, 87, 85, 84, 82, 80, 79, 77, 76, 74, 72, 70, 69, 67, 65, 63, 61, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 19, 17, 15, 13, 11, 9, 7, 5, 2, 0, -1, -3, -5, -7, -9, -12, -14, -16, -18, -20, -22, -24, -26, -28, -31, -33, -35, -37, -39, -41, -43, -45, -47, -49, -51, -53, -54, -56, -58, -60, -62, -64, -66, -67, -69, -71, -73, -74, -76, -78, -79, -81, -83, -84, -86, -87, -89, -90, -92, -93, -94, -96, -97, -98, -99, -101, -102, -103, -104, -105, -106, -107, -108, -109, -110, -111, -112, -113, -114, -114, -115, -116, -117, -117, -118, -118, -119, -119, -120, -120, -120, -121, -121, -121, -122, -122, -122, -122, -122, -122, -122, -122, -122, -122, -122, -122, -121, -121, -121, -120, -120, -120, -119, -119, -118, -118, -117, -116, -116, -115, -114, -113, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, -102, -100, -99, -98, -97, -95, -94, -93, -91, -90, -88, -87, -85, -84, -82, -80, -79, -77, -76, -74, -72, -70, -69, -67, -65, -63, -61, -60, -58, -56, -54, -52, -50, -48, -46, -44, -42, -40, -38, -36, -34, -32, -30, -28, -26, -24, -22, -19, -17, -15, -13, -11, -9, -7, -5, -2, 0, 1, 3, 5, 7, 9, 12, 14, 16, 18, 20, 22, 24, 26, 28, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 54, 56, 58, 60, 62, 64, 66, 67, 69, 71, 73, 74, 76, 78, 79, 81, 83, 84, 86, 87, 89, 90, 92, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112 }; static const s16 hsv_blue_y[] = { -11, -13, -15, -17, -19, -21, -23, -25, -27, -29, -31, -33, -35, -37, -39, -41, -43, -45, -46, -48, -50, -52, -54, -55, -57, -59, -61, -62, -64, -66, -67, -69, -71, -72, -74, -75, -77, -78, -80, -81, -83, -84, -86, -87, -88, -90, -91, -92, -93, -95, -96, -97, -98, -99, -100, -101, -102, -103, -104, -105, -106, -106, -107, -108, -109, -109, -110, -111, -111, -112, -112, -113, -113, -114, -114, -114, -115, -115, -115, -115, -116, -116, -116, -116, -116, -116, -116, -116, -116, -115, -115, -115, -115, -114, -114, -114, -113, -113, -112, -112, -111, -111, -110, -110, -109, -108, -108, -107, -106, -105, -104, -103, -102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -91, -90, -89, -88, -86, -85, -84, -82, -81, -79, -78, -76, -75, -73, -71, -70, -68, -67, -65, -63, -62, -60, -58, -56, -55, -53, -51, -49, -47, -45, -44, -42, -40, -38, -36, -34, -32, -30, -28, -26, -24, -22, -20, -18, -16, -14, -12, -10, -8, -6, -4, -2, 0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 46, 48, 50, 52, 54, 55, 57, 59, 61, 62, 64, 66, 67, 69, 71, 72, 74, 75, 77, 78, 80, 81, 83, 84, 86, 87, 88, 90, 91, 92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107, 108, 109, 109, 110, 111, 111, 112, 112, 113, 113, 114, 114, 114, 115, 115, 115, 115, 116, 116, 116, 116, 116, 116, 116, 116, 116, 115, 115, 115, 115, 114, 114, 114, 113, 113, 112, 112, 111, 111, 110, 110, 109, 108, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 91, 90, 89, 88, 86, 85, 84, 82, 81, 79, 78, 76, 75, 73, 71, 70, 68, 67, 65, 63, 62, 60, 58, 56, 55, 53, 51, 49, 47, 45, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, -1, -3, -5, -7, -9, -11 }; static const u16 bridge_init[][2] = { {0x1000, 0x78}, {0x1001, 0x40}, {0x1002, 0x1c}, {0x1020, 0x80}, {0x1061, 0x01}, {0x1067, 0x40}, {0x1068, 0x30}, {0x1069, 0x20}, {0x106a, 0x10}, {0x106b, 0x08}, {0x1188, 0x87}, {0x11a1, 0x00}, {0x11a2, 0x00}, {0x11a3, 0x6a}, {0x11a4, 0x50}, {0x11ab, 0x00}, {0x11ac, 0x00}, {0x11ad, 0x50}, {0x11ae, 0x3c}, {0x118a, 0x04}, {0x0395, 0x04}, {0x11b8, 0x3a}, {0x118b, 0x0e}, {0x10f7, 0x05}, {0x10f8, 0x14}, {0x10fa, 0xff}, {0x10f9, 0x00}, {0x11ba, 0x0a}, {0x11a5, 0x2d}, {0x11a6, 0x2d}, {0x11a7, 0x3a}, {0x11a8, 0x05}, {0x11a9, 0x04}, {0x11aa, 0x3f}, {0x11af, 0x28}, {0x11b0, 0xd8}, {0x11b1, 0x14}, {0x11b2, 0xec}, {0x11b3, 0x32}, {0x11b4, 0xdd}, {0x11b5, 0x32}, {0x11b6, 0xdd}, {0x10e0, 0x2c}, {0x11bc, 0x40}, {0x11bd, 0x01}, {0x11be, 0xf0}, {0x11bf, 0x00}, {0x118c, 0x1f}, {0x118d, 0x1f}, {0x118e, 0x1f}, {0x118f, 0x1f}, {0x1180, 0x01}, {0x1181, 0x00}, {0x1182, 0x01}, {0x1183, 0x00}, {0x1184, 0x50}, {0x1185, 0x80}, {0x1007, 0x00} }; /* Gain = (bit[3:0] / 16 + 1) * (bit[4] + 1) * (bit[5] + 1) * (bit[6] + 1) */ static const u8 ov_gain[] = { 0x00 /* 1x */, 0x04 /* 1.25x */, 0x08 /* 1.5x */, 0x0c /* 1.75x */, 0x10 /* 2x */, 0x12 /* 2.25x */, 0x14 /* 2.5x */, 0x16 /* 2.75x */, 0x18 /* 3x */, 0x1a /* 3.25x */, 0x1c /* 3.5x */, 0x1e /* 3.75x */, 0x30 /* 4x */, 0x31 /* 4.25x */, 0x32 /* 4.5x */, 0x33 /* 4.75x */, 0x34 /* 5x */, 0x35 /* 5.25x */, 0x36 /* 5.5x */, 0x37 /* 5.75x */, 0x38 /* 6x */, 0x39 /* 6.25x */, 0x3a /* 6.5x */, 0x3b /* 6.75x */, 0x3c /* 7x */, 0x3d /* 7.25x */, 0x3e /* 7.5x */, 0x3f /* 7.75x */, 0x70 /* 8x */ }; /* Gain = (bit[8] + 1) * (bit[7] + 1) * (bit[6:0] * 0.03125) */ static const u16 micron1_gain[] = { /* 1x 1.25x 1.5x 1.75x */ 0x0020, 0x0028, 0x0030, 0x0038, /* 2x 2.25x 2.5x 2.75x */ 0x00a0, 0x00a4, 0x00a8, 0x00ac, /* 3x 3.25x 3.5x 3.75x */ 0x00b0, 0x00b4, 0x00b8, 0x00bc, /* 4x 4.25x 4.5x 4.75x */ 0x00c0, 0x00c4, 0x00c8, 0x00cc, /* 5x 5.25x 5.5x 5.75x */ 0x00d0, 0x00d4, 0x00d8, 0x00dc, /* 6x 6.25x 6.5x 6.75x */ 0x00e0, 0x00e4, 0x00e8, 0x00ec, /* 7x 7.25x 7.5x 7.75x */ 0x00f0, 0x00f4, 0x00f8, 0x00fc, /* 8x */ 0x01c0 }; /* mt9m001 sensor uses a different gain formula then other micron sensors */ /* Gain = (bit[6] + 1) * (bit[5-0] * 0.125) */ static const u16 micron2_gain[] = { /* 1x 1.25x 1.5x 1.75x */ 0x0008, 0x000a, 0x000c, 0x000e, /* 2x 2.25x 2.5x 2.75x */ 0x0010, 0x0012, 0x0014, 0x0016, /* 3x 3.25x 3.5x 3.75x */ 0x0018, 0x001a, 0x001c, 0x001e, /* 4x 4.25x 4.5x 4.75x */ 0x0020, 0x0051, 0x0052, 0x0053, /* 5x 5.25x 5.5x 5.75x */ 0x0054, 0x0055, 0x0056, 0x0057, /* 6x 6.25x 6.5x 6.75x */ 0x0058, 0x0059, 0x005a, 0x005b, /* 7x 7.25x 7.5x 7.75x */ 0x005c, 0x005d, 0x005e, 0x005f, /* 8x */ 0x0060 }; /* Gain = .5 + bit[7:0] / 16 */ static const u8 hv7131r_gain[] = { 0x08 /* 1x */, 0x0c /* 1.25x */, 0x10 /* 1.5x */, 0x14 /* 1.75x */, 0x18 /* 2x */, 0x1c /* 2.25x */, 0x20 /* 2.5x */, 0x24 /* 2.75x */, 0x28 /* 3x */, 0x2c /* 3.25x */, 0x30 /* 3.5x */, 0x34 /* 3.75x */, 0x38 /* 4x */, 0x3c /* 4.25x */, 0x40 /* 4.5x */, 0x44 /* 4.75x */, 0x48 /* 5x */, 0x4c /* 5.25x */, 0x50 /* 5.5x */, 0x54 /* 5.75x */, 0x58 /* 6x */, 0x5c /* 6.25x */, 0x60 /* 6.5x */, 0x64 /* 6.75x */, 0x68 /* 7x */, 0x6c /* 7.25x */, 0x70 /* 7.5x */, 0x74 /* 7.75x */, 0x78 /* 8x */ }; static const struct i2c_reg_u8 soi968_init[] = { {0x0c, 0x00}, {0x0f, 0x1f}, {0x11, 0x80}, {0x38, 0x52}, {0x1e, 0x00}, {0x33, 0x08}, {0x35, 0x8c}, {0x36, 0x0c}, {0x37, 0x04}, {0x45, 0x04}, {0x47, 0xff}, {0x3e, 0x00}, {0x3f, 0x00}, {0x3b, 0x20}, {0x3a, 0x96}, {0x3d, 0x0a}, {0x14, 0x8e}, {0x13, 0x8b}, {0x12, 0x40}, {0x17, 0x13}, {0x18, 0x63}, {0x19, 0x01}, {0x1a, 0x79}, {0x32, 0x24}, {0x03, 0x00}, {0x11, 0x40}, {0x2a, 0x10}, {0x2b, 0xe0}, {0x10, 0x32}, {0x00, 0x00}, {0x01, 0x80}, {0x02, 0x80}, }; static const struct i2c_reg_u8 ov7660_init[] = { {0x0e, 0x80}, {0x0d, 0x08}, {0x0f, 0xc3}, {0x04, 0xc3}, {0x10, 0x40}, {0x11, 0x40}, {0x12, 0x05}, {0x13, 0xba}, {0x14, 0x2a}, /* HDG Set hstart and hstop, datasheet default 0x11, 0x61, using 0x10, 0x61 and sd->hstart, vstart = 3, fixes ugly colored borders */ {0x17, 0x10}, {0x18, 0x61}, {0x37, 0x0f}, {0x38, 0x02}, {0x39, 0x43}, {0x3a, 0x00}, {0x69, 0x90}, {0x2d, 0x00}, {0x2e, 0x00}, {0x01, 0x78}, {0x02, 0x50}, }; static const struct i2c_reg_u8 ov7670_init[] = { {0x11, 0x80}, {0x3a, 0x04}, {0x12, 0x01}, {0x32, 0xb6}, {0x03, 0x0a}, {0x0c, 0x00}, {0x3e, 0x00}, {0x70, 0x3a}, {0x71, 0x35}, {0x72, 0x11}, {0x73, 0xf0}, {0xa2, 0x02}, {0x13, 0xe0}, {0x00, 0x00}, {0x10, 0x00}, {0x0d, 0x40}, {0x14, 0x28}, {0xa5, 0x05}, {0xab, 0x07}, {0x24, 0x95}, {0x25, 0x33}, {0x26, 0xe3}, {0x9f, 0x75}, {0xa0, 0x65}, {0xa1, 0x0b}, {0xa6, 0xd8}, {0xa7, 0xd8}, {0xa8, 0xf0}, {0xa9, 0x90}, {0xaa, 0x94}, {0x13, 0xe5}, {0x0e, 0x61}, {0x0f, 0x4b}, {0x16, 0x02}, {0x1e, 0x27}, {0x21, 0x02}, {0x22, 0x91}, {0x29, 0x07}, {0x33, 0x0b}, {0x35, 0x0b}, {0x37, 0x1d}, {0x38, 0x71}, {0x39, 0x2a}, {0x3c, 0x78}, {0x4d, 0x40}, {0x4e, 0x20}, {0x69, 0x00}, {0x74, 0x19}, {0x8d, 0x4f}, {0x8e, 0x00}, {0x8f, 0x00}, {0x90, 0x00}, {0x91, 0x00}, {0x96, 0x00}, {0x9a, 0x80}, {0xb0, 0x84}, {0xb1, 0x0c}, {0xb2, 0x0e}, {0xb3, 0x82}, {0xb8, 0x0a}, {0x43, 0x0a}, {0x44, 0xf0}, {0x45, 0x20}, {0x46, 0x7d}, {0x47, 0x29}, {0x48, 0x4a}, {0x59, 0x8c}, {0x5a, 0xa5}, {0x5b, 0xde}, {0x5c, 0x96}, {0x5d, 0x66}, {0x5e, 0x10}, {0x6c, 0x0a}, {0x6d, 0x55}, {0x6e, 0x11}, {0x6f, 0x9e}, {0x6a, 0x40}, {0x01, 0x40}, {0x02, 0x40}, {0x13, 0xe7}, {0x4f, 0x6e}, {0x50, 0x70}, {0x51, 0x02}, {0x52, 0x1d}, {0x53, 0x56}, {0x54, 0x73}, {0x55, 0x0a}, {0x56, 0x55}, {0x57, 0x80}, {0x58, 0x9e}, {0x41, 0x08}, {0x3f, 0x02}, {0x75, 0x03}, {0x76, 0x63}, {0x4c, 0x04}, {0x77, 0x06}, {0x3d, 0x02}, {0x4b, 0x09}, {0xc9, 0x30}, {0x41, 0x08}, {0x56, 0x48}, {0x34, 0x11}, {0xa4, 0x88}, {0x96, 0x00}, {0x97, 0x30}, {0x98, 0x20}, {0x99, 0x30}, {0x9a, 0x84}, {0x9b, 0x29}, {0x9c, 0x03}, {0x9d, 0x99}, {0x9e, 0x7f}, {0x78, 0x04}, {0x79, 0x01}, {0xc8, 0xf0}, {0x79, 0x0f}, {0xc8, 0x00}, {0x79, 0x10}, {0xc8, 0x7e}, {0x79, 0x0a}, {0xc8, 0x80}, {0x79, 0x0b}, {0xc8, 0x01}, {0x79, 0x0c}, {0xc8, 0x0f}, {0x79, 0x0d}, {0xc8, 0x20}, {0x79, 0x09}, {0xc8, 0x80}, {0x79, 0x02}, {0xc8, 0xc0}, {0x79, 0x03}, {0xc8, 0x40}, {0x79, 0x05}, {0xc8, 0x30}, {0x79, 0x26}, {0x62, 0x20}, {0x63, 0x00}, {0x64, 0x06}, {0x65, 0x00}, {0x66, 0x05}, {0x94, 0x05}, {0x95, 0x0a}, {0x17, 0x13}, {0x18, 0x01}, {0x19, 0x02}, {0x1a, 0x7a}, {0x46, 0x59}, {0x47, 0x30}, {0x58, 0x9a}, {0x59, 0x84}, {0x5a, 0x91}, {0x5b, 0x57}, {0x5c, 0x75}, {0x5d, 0x6d}, {0x5e, 0x13}, {0x64, 0x07}, {0x94, 0x07}, {0x95, 0x0d}, {0xa6, 0xdf}, {0xa7, 0xdf}, {0x48, 0x4d}, {0x51, 0x00}, {0x6b, 0x0a}, {0x11, 0x80}, {0x2a, 0x00}, {0x2b, 0x00}, {0x92, 0x00}, {0x93, 0x00}, {0x55, 0x0a}, {0x56, 0x60}, {0x4f, 0x6e}, {0x50, 0x70}, {0x51, 0x00}, {0x52, 0x1d}, {0x53, 0x56}, {0x54, 0x73}, {0x58, 0x9a}, {0x4f, 0x6e}, {0x50, 0x70}, {0x51, 0x00}, {0x52, 0x1d}, {0x53, 0x56}, {0x54, 0x73}, {0x58, 0x9a}, {0x3f, 0x01}, {0x7b, 0x03}, {0x7c, 0x09}, {0x7d, 0x16}, {0x7e, 0x38}, {0x7f, 0x47}, {0x80, 0x53}, {0x81, 0x5e}, {0x82, 0x6a}, {0x83, 0x74}, {0x84, 0x80}, {0x85, 0x8c}, {0x86, 0x9b}, {0x87, 0xb2}, {0x88, 0xcc}, {0x89, 0xe5}, {0x7a, 0x24}, {0x3b, 0x00}, {0x9f, 0x76}, {0xa0, 0x65}, {0x13, 0xe2}, {0x6b, 0x0a}, {0x11, 0x80}, {0x2a, 0x00}, {0x2b, 0x00}, {0x92, 0x00}, {0x93, 0x00}, }; static const struct i2c_reg_u8 ov9650_init[] = { {0x00, 0x00}, {0x01, 0x78}, {0x02, 0x78}, {0x03, 0x36}, {0x04, 0x03}, {0x05, 0x00}, {0x06, 0x00}, {0x08, 0x00}, {0x09, 0x01}, {0x0c, 0x00}, {0x0d, 0x00}, {0x0e, 0xa0}, {0x0f, 0x52}, {0x10, 0x7c}, {0x11, 0x80}, {0x12, 0x45}, {0x13, 0xc2}, {0x14, 0x2e}, {0x15, 0x00}, {0x16, 0x07}, {0x17, 0x24}, {0x18, 0xc5}, {0x19, 0x00}, {0x1a, 0x3c}, {0x1b, 0x00}, {0x1e, 0x04}, {0x1f, 0x00}, {0x24, 0x78}, {0x25, 0x68}, {0x26, 0xd4}, {0x27, 0x80}, {0x28, 0x80}, {0x29, 0x30}, {0x2a, 0x00}, {0x2b, 0x00}, {0x2c, 0x80}, {0x2d, 0x00}, {0x2e, 0x00}, {0x2f, 0x00}, {0x30, 0x08}, {0x31, 0x30}, {0x32, 0x84}, {0x33, 0xe2}, {0x34, 0xbf}, {0x35, 0x81}, {0x36, 0xf9}, {0x37, 0x00}, {0x38, 0x93}, {0x39, 0x50}, {0x3a, 0x01}, {0x3b, 0x01}, {0x3c, 0x73}, {0x3d, 0x19}, {0x3e, 0x0b}, {0x3f, 0x80}, {0x40, 0xc1}, {0x41, 0x00}, {0x42, 0x08}, {0x67, 0x80}, {0x68, 0x80}, {0x69, 0x40}, {0x6a, 0x00}, {0x6b, 0x0a}, {0x8b, 0x06}, {0x8c, 0x20}, {0x8d, 0x00}, {0x8e, 0x00}, {0x8f, 0xdf}, {0x92, 0x00}, {0x93, 0x00}, {0x94, 0x88}, {0x95, 0x88}, {0x96, 0x04}, {0xa1, 0x00}, {0xa5, 0x80}, {0xa8, 0x80}, {0xa9, 0xb8}, {0xaa, 0x92}, {0xab, 0x0a}, }; static const struct i2c_reg_u8 ov9655_init[] = { {0x0e, 0x61}, {0x11, 0x80}, {0x13, 0xba}, {0x14, 0x2e}, {0x16, 0x24}, {0x1e, 0x04}, {0x27, 0x08}, {0x28, 0x08}, {0x29, 0x15}, {0x2c, 0x08}, {0x34, 0x3d}, {0x35, 0x00}, {0x38, 0x12}, {0x0f, 0x42}, {0x39, 0x57}, {0x3a, 0x00}, {0x3b, 0xcc}, {0x3c, 0x0c}, {0x3d, 0x19}, {0x3e, 0x0c}, {0x3f, 0x01}, {0x41, 0x40}, {0x42, 0x80}, {0x45, 0x46}, {0x46, 0x62}, {0x47, 0x2a}, {0x48, 0x3c}, {0x4a, 0xf0}, {0x4b, 0xdc}, {0x4c, 0xdc}, {0x4d, 0xdc}, {0x4e, 0xdc}, {0x6c, 0x04}, {0x6f, 0x9e}, {0x70, 0x05}, {0x71, 0x78}, {0x77, 0x02}, {0x8a, 0x23}, {0x90, 0x7e}, {0x91, 0x7c}, {0x9f, 0x6e}, {0xa0, 0x6e}, {0xa5, 0x68}, {0xa6, 0x60}, {0xa8, 0xc1}, {0xa9, 0xfa}, {0xaa, 0x92}, {0xab, 0x04}, {0xac, 0x80}, {0xad, 0x80}, {0xae, 0x80}, {0xaf, 0x80}, {0xb2, 0xf2}, {0xb3, 0x20}, {0xb5, 0x00}, {0xb6, 0xaf}, {0xbb, 0xae}, {0xbc, 0x44}, {0xbd, 0x44}, {0xbe, 0x3b}, {0xbf, 0x3a}, {0xc1, 0xc8}, {0xc2, 0x01}, {0xc4, 0x00}, {0xc6, 0x85}, {0xc7, 0x81}, {0xc9, 0xe0}, {0xca, 0xe8}, {0xcc, 0xd8}, {0xcd, 0x93}, {0x2d, 0x00}, {0x2e, 0x00}, {0x01, 0x80}, {0x02, 0x80}, {0x12, 0x61}, {0x36, 0xfa}, {0x8c, 0x8d}, {0xc0, 0xaa}, {0x69, 0x0a}, {0x03, 0x09}, {0x17, 0x16}, {0x18, 0x6e}, {0x19, 0x01}, {0x1a, 0x3e}, {0x32, 0x09}, {0x2a, 0x10}, {0x2b, 0x0a}, {0x92, 0x00}, {0x93, 0x00}, {0xa1, 0x00}, {0x10, 0x7c}, {0x04, 0x03}, {0x00, 0x13}, }; static const struct i2c_reg_u16 mt9v112_init[] = { {0xf0, 0x0000}, {0x0d, 0x0021}, {0x0d, 0x0020}, {0x34, 0xc019}, {0x0a, 0x0011}, {0x0b, 0x000b}, {0x20, 0x0703}, {0x35, 0x2022}, {0xf0, 0x0001}, {0x05, 0x0000}, {0x06, 0x340c}, {0x3b, 0x042a}, {0x3c, 0x0400}, {0xf0, 0x0002}, {0x2e, 0x0c58}, {0x5b, 0x0001}, {0xc8, 0x9f0b}, {0xf0, 0x0001}, {0x9b, 0x5300}, {0xf0, 0x0000}, {0x2b, 0x0020}, {0x2c, 0x002a}, {0x2d, 0x0032}, {0x2e, 0x0020}, {0x09, 0x01dc}, {0x01, 0x000c}, {0x02, 0x0020}, {0x03, 0x01e0}, {0x04, 0x0280}, {0x06, 0x000c}, {0x05, 0x0098}, {0x20, 0x0703}, {0x09, 0x01f2}, {0x2b, 0x00a0}, {0x2c, 0x00a0}, {0x2d, 0x00a0}, {0x2e, 0x00a0}, {0x01, 0x000c}, {0x02, 0x0020}, {0x03, 0x01e0}, {0x04, 0x0280}, {0x06, 0x000c}, {0x05, 0x0098}, {0x09, 0x01c1}, {0x2b, 0x00ae}, {0x2c, 0x00ae}, {0x2d, 0x00ae}, {0x2e, 0x00ae}, }; static const struct i2c_reg_u16 mt9v111_init[] = { {0x01, 0x0004}, {0x0d, 0x0001}, {0x0d, 0x0000}, {0x01, 0x0001}, {0x05, 0x0004}, {0x2d, 0xe0a0}, {0x2e, 0x0c64}, {0x2f, 0x0064}, {0x06, 0x600e}, {0x08, 0x0480}, {0x01, 0x0004}, {0x02, 0x0016}, {0x03, 0x01e7}, {0x04, 0x0287}, {0x05, 0x0004}, {0x06, 0x002d}, {0x07, 0x3002}, {0x08, 0x0008}, {0x0e, 0x0008}, {0x20, 0x0000} }; static const struct i2c_reg_u16 mt9v011_init[] = { {0x07, 0x0002}, {0x0d, 0x0001}, {0x0d, 0x0000}, {0x01, 0x0008}, {0x02, 0x0016}, {0x03, 0x01e1}, {0x04, 0x0281}, {0x05, 0x0083}, {0x06, 0x0006}, {0x0d, 0x0002}, {0x0a, 0x0000}, {0x0b, 0x0000}, {0x0c, 0x0000}, {0x0d, 0x0000}, {0x0e, 0x0000}, {0x0f, 0x0000}, {0x10, 0x0000}, {0x11, 0x0000}, {0x12, 0x0000}, {0x13, 0x0000}, {0x14, 0x0000}, {0x15, 0x0000}, {0x16, 0x0000}, {0x17, 0x0000}, {0x18, 0x0000}, {0x19, 0x0000}, {0x1a, 0x0000}, {0x1b, 0x0000}, {0x1c, 0x0000}, {0x1d, 0x0000}, {0x32, 0x0000}, {0x20, 0x1101}, {0x21, 0x0000}, {0x22, 0x0000}, {0x23, 0x0000}, {0x24, 0x0000}, {0x25, 0x0000}, {0x26, 0x0000}, {0x27, 0x0024}, {0x2f, 0xf7b0}, {0x30, 0x0005}, {0x31, 0x0000}, {0x32, 0x0000}, {0x33, 0x0000}, {0x34, 0x0100}, {0x3d, 0x068f}, {0x40, 0x01e0}, {0x41, 0x00d1}, {0x44, 0x0082}, {0x5a, 0x0000}, {0x5b, 0x0000}, {0x5c, 0x0000}, {0x5d, 0x0000}, {0x5e, 0x0000}, {0x5f, 0xa31d}, {0x62, 0x0611}, {0x0a, 0x0000}, {0x06, 0x0029}, {0x05, 0x0009}, {0x20, 0x1101}, {0x20, 0x1101}, {0x09, 0x0064}, {0x07, 0x0003}, {0x2b, 0x0033}, {0x2c, 0x00a0}, {0x2d, 0x00a0}, {0x2e, 0x0033}, {0x07, 0x0002}, {0x06, 0x0000}, {0x06, 0x0029}, {0x05, 0x0009}, }; static const struct i2c_reg_u16 mt9m001_init[] = { {0x0d, 0x0001}, {0x0d, 0x0000}, {0x04, 0x0500}, /* hres = 1280 */ {0x03, 0x0400}, /* vres = 1024 */ {0x20, 0x1100}, {0x06, 0x0010}, {0x2b, 0x0024}, {0x2e, 0x0024}, {0x35, 0x0024}, {0x2d, 0x0020}, {0x2c, 0x0020}, {0x09, 0x0ad4}, {0x35, 0x0057}, }; static const struct i2c_reg_u16 mt9m111_init[] = { {0xf0, 0x0000}, {0x0d, 0x0021}, {0x0d, 0x0008}, {0xf0, 0x0001}, {0x3a, 0x4300}, {0x9b, 0x4300}, {0x06, 0x708e}, {0xf0, 0x0002}, {0x2e, 0x0a1e}, {0xf0, 0x0000}, }; static const struct i2c_reg_u16 mt9m112_init[] = { {0xf0, 0x0000}, {0x0d, 0x0021}, {0x0d, 0x0008}, {0xf0, 0x0001}, {0x3a, 0x4300}, {0x9b, 0x4300}, {0x06, 0x708e}, {0xf0, 0x0002}, {0x2e, 0x0a1e}, {0xf0, 0x0000}, }; static const struct i2c_reg_u8 hv7131r_init[] = { {0x02, 0x08}, {0x02, 0x00}, {0x01, 0x08}, {0x02, 0x00}, {0x20, 0x00}, {0x21, 0xd0}, {0x22, 0x00}, {0x23, 0x09}, {0x01, 0x08}, {0x01, 0x08}, {0x01, 0x08}, {0x25, 0x07}, {0x26, 0xc3}, {0x27, 0x50}, {0x30, 0x62}, {0x31, 0x10}, {0x32, 0x06}, {0x33, 0x10}, {0x20, 0x00}, {0x21, 0xd0}, {0x22, 0x00}, {0x23, 0x09}, {0x01, 0x08}, }; static void reg_r(struct gspca_dev *gspca_dev, u16 reg, u16 length) { struct usb_device *dev = gspca_dev->dev; int result; if (gspca_dev->usb_err < 0) return; result = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, reg, 0x00, gspca_dev->usb_buf, length, 500); if (unlikely(result < 0 || result != length)) { pr_err("Read register %02x failed %d\n", reg, result); gspca_dev->usb_err = result; /* * Make sure the buffer is zeroed to avoid uninitialized * values. */ memset(gspca_dev->usb_buf, 0, USB_BUF_SZ); } } static void reg_w(struct gspca_dev *gspca_dev, u16 reg, const u8 *buffer, int length) { struct usb_device *dev = gspca_dev->dev; int result; if (gspca_dev->usb_err < 0) return; memcpy(gspca_dev->usb_buf, buffer, length); result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0x08, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, reg, 0x00, gspca_dev->usb_buf, length, 500); if (unlikely(result < 0 || result != length)) { pr_err("Write register %02x failed %d\n", reg, result); gspca_dev->usb_err = result; } } static void reg_w1(struct gspca_dev *gspca_dev, u16 reg, const u8 value) { reg_w(gspca_dev, reg, &value, 1); } static void i2c_w(struct gspca_dev *gspca_dev, const u8 *buffer) { int i; reg_w(gspca_dev, 0x10c0, buffer, 8); for (i = 0; i < 5; i++) { reg_r(gspca_dev, 0x10c0, 1); if (gspca_dev->usb_err < 0) return; if (gspca_dev->usb_buf[0] & 0x04) { if (gspca_dev->usb_buf[0] & 0x08) { pr_err("i2c_w error\n"); gspca_dev->usb_err = -EIO; } return; } msleep(10); } pr_err("i2c_w reg %02x no response\n", buffer[2]); /* gspca_dev->usb_err = -EIO; fixme: may occur */ } static void i2c_w1(struct gspca_dev *gspca_dev, u8 reg, u8 val) { struct sd *sd = (struct sd *) gspca_dev; u8 row[8]; /* * from the point of view of the bridge, the length * includes the address */ row[0] = sd->i2c_intf | (2 << 4); row[1] = sd->i2c_addr; row[2] = reg; row[3] = val; row[4] = 0x00; row[5] = 0x00; row[6] = 0x00; row[7] = 0x10; i2c_w(gspca_dev, row); } static void i2c_w1_buf(struct gspca_dev *gspca_dev, const struct i2c_reg_u8 *buf, int sz) { while (--sz >= 0) { i2c_w1(gspca_dev, buf->reg, buf->val); buf++; } } static void i2c_w2(struct gspca_dev *gspca_dev, u8 reg, u16 val) { struct sd *sd = (struct sd *) gspca_dev; u8 row[8]; /* * from the point of view of the bridge, the length * includes the address */ row[0] = sd->i2c_intf | (3 << 4); row[1] = sd->i2c_addr; row[2] = reg; row[3] = val >> 8; row[4] = val; row[5] = 0x00; row[6] = 0x00; row[7] = 0x10; i2c_w(gspca_dev, row); } static void i2c_w2_buf(struct gspca_dev *gspca_dev, const struct i2c_reg_u16 *buf, int sz) { while (--sz >= 0) { i2c_w2(gspca_dev, buf->reg, buf->val); buf++; } } static void i2c_r1(struct gspca_dev *gspca_dev, u8 reg, u8 *val) { struct sd *sd = (struct sd *) gspca_dev; u8 row[8]; row[0] = sd->i2c_intf | (1 << 4); row[1] = sd->i2c_addr; row[2] = reg; row[3] = 0; row[4] = 0; row[5] = 0; row[6] = 0; row[7] = 0x10; i2c_w(gspca_dev, row); row[0] = sd->i2c_intf | (1 << 4) | 0x02; row[2] = 0; i2c_w(gspca_dev, row); reg_r(gspca_dev, 0x10c2, 5); *val = gspca_dev->usb_buf[4]; } static void i2c_r2(struct gspca_dev *gspca_dev, u8 reg, u16 *val) { struct sd *sd = (struct sd *) gspca_dev; u8 row[8]; row[0] = sd->i2c_intf | (1 << 4); row[1] = sd->i2c_addr; row[2] = reg; row[3] = 0; row[4] = 0; row[5] = 0; row[6] = 0; row[7] = 0x10; i2c_w(gspca_dev, row); row[0] = sd->i2c_intf | (2 << 4) | 0x02; row[2] = 0; i2c_w(gspca_dev, row); reg_r(gspca_dev, 0x10c2, 5); *val = (gspca_dev->usb_buf[3] << 8) | gspca_dev->usb_buf[4]; } static void ov9650_init_sensor(struct gspca_dev *gspca_dev) { u16 id; struct sd *sd = (struct sd *) gspca_dev; i2c_r2(gspca_dev, 0x1c, &id); if (gspca_dev->usb_err < 0) return; if (id != 0x7fa2) { pr_err("sensor id for ov9650 doesn't match (0x%04x)\n", id); gspca_dev->usb_err = -ENODEV; return; } i2c_w1(gspca_dev, 0x12, 0x80); /* sensor reset */ msleep(200); i2c_w1_buf(gspca_dev, ov9650_init, ARRAY_SIZE(ov9650_init)); if (gspca_dev->usb_err < 0) pr_err("OV9650 sensor initialization failed\n"); sd->hstart = 1; sd->vstart = 7; } static void ov9655_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; i2c_w1(gspca_dev, 0x12, 0x80); /* sensor reset */ msleep(200); i2c_w1_buf(gspca_dev, ov9655_init, ARRAY_SIZE(ov9655_init)); if (gspca_dev->usb_err < 0) pr_err("OV9655 sensor initialization failed\n"); sd->hstart = 1; sd->vstart = 2; } static void soi968_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; i2c_w1(gspca_dev, 0x12, 0x80); /* sensor reset */ msleep(200); i2c_w1_buf(gspca_dev, soi968_init, ARRAY_SIZE(soi968_init)); if (gspca_dev->usb_err < 0) pr_err("SOI968 sensor initialization failed\n"); sd->hstart = 60; sd->vstart = 11; } static void ov7660_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; i2c_w1(gspca_dev, 0x12, 0x80); /* sensor reset */ msleep(200); i2c_w1_buf(gspca_dev, ov7660_init, ARRAY_SIZE(ov7660_init)); if (gspca_dev->usb_err < 0) pr_err("OV7660 sensor initialization failed\n"); sd->hstart = 3; sd->vstart = 3; } static void ov7670_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; i2c_w1(gspca_dev, 0x12, 0x80); /* sensor reset */ msleep(200); i2c_w1_buf(gspca_dev, ov7670_init, ARRAY_SIZE(ov7670_init)); if (gspca_dev->usb_err < 0) pr_err("OV7670 sensor initialization failed\n"); sd->hstart = 0; sd->vstart = 1; } static void mt9v_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; u16 value; sd->i2c_addr = 0x5d; i2c_r2(gspca_dev, 0xff, &value); if (gspca_dev->usb_err >= 0 && value == 0x8243) { i2c_w2_buf(gspca_dev, mt9v011_init, ARRAY_SIZE(mt9v011_init)); if (gspca_dev->usb_err < 0) { pr_err("MT9V011 sensor initialization failed\n"); return; } sd->hstart = 2; sd->vstart = 2; sd->sensor = SENSOR_MT9V011; pr_info("MT9V011 sensor detected\n"); return; } gspca_dev->usb_err = 0; sd->i2c_addr = 0x5c; i2c_w2(gspca_dev, 0x01, 0x0004); i2c_r2(gspca_dev, 0xff, &value); if (gspca_dev->usb_err >= 0 && value == 0x823a) { i2c_w2_buf(gspca_dev, mt9v111_init, ARRAY_SIZE(mt9v111_init)); if (gspca_dev->usb_err < 0) { pr_err("MT9V111 sensor initialization failed\n"); return; } sd->hstart = 2; sd->vstart = 2; sd->sensor = SENSOR_MT9V111; pr_info("MT9V111 sensor detected\n"); return; } gspca_dev->usb_err = 0; sd->i2c_addr = 0x5d; i2c_w2(gspca_dev, 0xf0, 0x0000); if (gspca_dev->usb_err < 0) { gspca_dev->usb_err = 0; sd->i2c_addr = 0x48; i2c_w2(gspca_dev, 0xf0, 0x0000); } i2c_r2(gspca_dev, 0x00, &value); if (gspca_dev->usb_err >= 0 && value == 0x1229) { i2c_w2_buf(gspca_dev, mt9v112_init, ARRAY_SIZE(mt9v112_init)); if (gspca_dev->usb_err < 0) { pr_err("MT9V112 sensor initialization failed\n"); return; } sd->hstart = 6; sd->vstart = 2; sd->sensor = SENSOR_MT9V112; pr_info("MT9V112 sensor detected\n"); return; } gspca_dev->usb_err = -ENODEV; } static void mt9m112_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; i2c_w2_buf(gspca_dev, mt9m112_init, ARRAY_SIZE(mt9m112_init)); if (gspca_dev->usb_err < 0) pr_err("MT9M112 sensor initialization failed\n"); sd->hstart = 0; sd->vstart = 2; } static void mt9m111_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; i2c_w2_buf(gspca_dev, mt9m111_init, ARRAY_SIZE(mt9m111_init)); if (gspca_dev->usb_err < 0) pr_err("MT9M111 sensor initialization failed\n"); sd->hstart = 0; sd->vstart = 2; } static void mt9m001_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; u16 id; i2c_r2(gspca_dev, 0x00, &id); if (gspca_dev->usb_err < 0) return; /* must be 0x8411 or 0x8421 for colour sensor and 8431 for bw */ switch (id) { case 0x8411: case 0x8421: pr_info("MT9M001 color sensor detected\n"); break; case 0x8431: pr_info("MT9M001 mono sensor detected\n"); break; default: pr_err("No MT9M001 chip detected, ID = %x\n\n", id); gspca_dev->usb_err = -ENODEV; return; } i2c_w2_buf(gspca_dev, mt9m001_init, ARRAY_SIZE(mt9m001_init)); if (gspca_dev->usb_err < 0) pr_err("MT9M001 sensor initialization failed\n"); sd->hstart = 1; sd->vstart = 1; } static void hv7131r_init_sensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; i2c_w1_buf(gspca_dev, hv7131r_init, ARRAY_SIZE(hv7131r_init)); if (gspca_dev->usb_err < 0) pr_err("HV7131R Sensor initialization failed\n"); sd->hstart = 0; sd->vstart = 1; } static void set_cmatrix(struct gspca_dev *gspca_dev, s32 brightness, s32 contrast, s32 satur, s32 hue) { s32 hue_coord, hue_index = 180 + hue; u8 cmatrix[21]; memset(cmatrix, 0, sizeof(cmatrix)); cmatrix[2] = (contrast * 0x25 / 0x100) + 0x26; cmatrix[0] = 0x13 + (cmatrix[2] - 0x26) * 0x13 / 0x25; cmatrix[4] = 0x07 + (cmatrix[2] - 0x26) * 0x07 / 0x25; cmatrix[18] = brightness - 0x80; hue_coord = (hsv_red_x[hue_index] * satur) >> 8; cmatrix[6] = hue_coord; cmatrix[7] = (hue_coord >> 8) & 0x0f; hue_coord = (hsv_red_y[hue_index] * satur) >> 8; cmatrix[8] = hue_coord; cmatrix[9] = (hue_coord >> 8) & 0x0f; hue_coord = (hsv_green_x[hue_index] * satur) >> 8; cmatrix[10] = hue_coord; cmatrix[11] = (hue_coord >> 8) & 0x0f; hue_coord = (hsv_green_y[hue_index] * satur) >> 8; cmatrix[12] = hue_coord; cmatrix[13] = (hue_coord >> 8) & 0x0f; hue_coord = (hsv_blue_x[hue_index] * satur) >> 8; cmatrix[14] = hue_coord; cmatrix[15] = (hue_coord >> 8) & 0x0f; hue_coord = (hsv_blue_y[hue_index] * satur) >> 8; cmatrix[16] = hue_coord; cmatrix[17] = (hue_coord >> 8) & 0x0f; reg_w(gspca_dev, 0x10e1, cmatrix, 21); } static void set_gamma(struct gspca_dev *gspca_dev, s32 val) { u8 gamma[17]; u8 gval = val * 0xb8 / 0x100; gamma[0] = 0x0a; gamma[1] = 0x13 + (gval * (0xcb - 0x13) / 0xb8); gamma[2] = 0x25 + (gval * (0xee - 0x25) / 0xb8); gamma[3] = 0x37 + (gval * (0xfa - 0x37) / 0xb8); gamma[4] = 0x45 + (gval * (0xfc - 0x45) / 0xb8); gamma[5] = 0x55 + (gval * (0xfb - 0x55) / 0xb8); gamma[6] = 0x65 + (gval * (0xfc - 0x65) / 0xb8); gamma[7] = 0x74 + (gval * (0xfd - 0x74) / 0xb8); gamma[8] = 0x83 + (gval * (0xfe - 0x83) / 0xb8); gamma[9] = 0x92 + (gval * (0xfc - 0x92) / 0xb8); gamma[10] = 0xa1 + (gval * (0xfc - 0xa1) / 0xb8); gamma[11] = 0xb0 + (gval * (0xfc - 0xb0) / 0xb8); gamma[12] = 0xbf + (gval * (0xfb - 0xbf) / 0xb8); gamma[13] = 0xce + (gval * (0xfb - 0xce) / 0xb8); gamma[14] = 0xdf + (gval * (0xfd - 0xdf) / 0xb8); gamma[15] = 0xea + (gval * (0xf9 - 0xea) / 0xb8); gamma[16] = 0xf5; reg_w(gspca_dev, 0x1190, gamma, 17); } static void set_redblue(struct gspca_dev *gspca_dev, s32 blue, s32 red) { reg_w1(gspca_dev, 0x118c, red); reg_w1(gspca_dev, 0x118f, blue); } static void set_hvflip(struct gspca_dev *gspca_dev, s32 hflip, s32 vflip) { u8 value, tslb; u16 value2; struct sd *sd = (struct sd *) gspca_dev; if ((sd->flags & FLIP_DETECT) && dmi_check_system(flip_dmi_table)) { hflip = !hflip; vflip = !vflip; } switch (sd->sensor) { case SENSOR_OV7660: value = 0x01; if (hflip) value |= 0x20; if (vflip) { value |= 0x10; sd->vstart = 2; } else { sd->vstart = 3; } reg_w1(gspca_dev, 0x1182, sd->vstart); i2c_w1(gspca_dev, 0x1e, value); break; case SENSOR_OV9650: i2c_r1(gspca_dev, 0x1e, &value); value &= ~0x30; tslb = 0x01; if (hflip) value |= 0x20; if (vflip) { value |= 0x10; tslb = 0x49; } i2c_w1(gspca_dev, 0x1e, value); i2c_w1(gspca_dev, 0x3a, tslb); break; case SENSOR_MT9V111: case SENSOR_MT9V011: i2c_r2(gspca_dev, 0x20, &value2); value2 &= ~0xc0a0; if (hflip) value2 |= 0x8080; if (vflip) value2 |= 0x4020; i2c_w2(gspca_dev, 0x20, value2); break; case SENSOR_MT9M112: case SENSOR_MT9M111: case SENSOR_MT9V112: i2c_r2(gspca_dev, 0x20, &value2); value2 &= ~0x0003; if (hflip) value2 |= 0x0002; if (vflip) value2 |= 0x0001; i2c_w2(gspca_dev, 0x20, value2); break; case SENSOR_HV7131R: i2c_r1(gspca_dev, 0x01, &value); value &= ~0x03; if (vflip) value |= 0x01; if (hflip) value |= 0x02; i2c_w1(gspca_dev, 0x01, value); break; } } static void set_exposure(struct gspca_dev *gspca_dev, s32 expo) { struct sd *sd = (struct sd *) gspca_dev; u8 exp[8] = {sd->i2c_intf, sd->i2c_addr, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10}; int expo2; if (gspca_dev->streaming) exp[7] = 0x1e; switch (sd->sensor) { case SENSOR_OV7660: case SENSOR_OV7670: case SENSOR_OV9655: case SENSOR_OV9650: if (expo > 547) expo2 = 547; else expo2 = expo; exp[0] |= (2 << 4); exp[2] = 0x10; /* AECH */ exp[3] = expo2 >> 2; exp[7] = 0x10; i2c_w(gspca_dev, exp); exp[2] = 0x04; /* COM1 */ exp[3] = expo2 & 0x0003; exp[7] = 0x10; i2c_w(gspca_dev, exp); expo -= expo2; exp[7] = 0x1e; exp[0] |= (3 << 4); exp[2] = 0x2d; /* ADVFL & ADVFH */ exp[3] = expo; exp[4] = expo >> 8; break; case SENSOR_MT9M001: case SENSOR_MT9V112: case SENSOR_MT9V011: exp[0] |= (3 << 4); exp[2] = 0x09; exp[3] = expo >> 8; exp[4] = expo; break; case SENSOR_HV7131R: exp[0] |= (4 << 4); exp[2] = 0x25; exp[3] = expo >> 5; exp[4] = expo << 3; exp[5] = 0; break; default: return; } i2c_w(gspca_dev, exp); } static void set_gain(struct gspca_dev *gspca_dev, s32 g) { struct sd *sd = (struct sd *) gspca_dev; u8 gain[8] = {sd->i2c_intf, sd->i2c_addr, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10}; if (gspca_dev->streaming) gain[7] = 0x15; /* or 1d ? */ switch (sd->sensor) { case SENSOR_OV7660: case SENSOR_OV7670: case SENSOR_SOI968: case SENSOR_OV9655: case SENSOR_OV9650: gain[0] |= (2 << 4); gain[3] = ov_gain[g]; break; case SENSOR_MT9V011: gain[0] |= (3 << 4); gain[2] = 0x35; gain[3] = micron1_gain[g] >> 8; gain[4] = micron1_gain[g]; break; case SENSOR_MT9V112: gain[0] |= (3 << 4); gain[2] = 0x2f; gain[3] = micron1_gain[g] >> 8; gain[4] = micron1_gain[g]; break; case SENSOR_MT9M001: gain[0] |= (3 << 4); gain[2] = 0x2f; gain[3] = micron2_gain[g] >> 8; gain[4] = micron2_gain[g]; break; case SENSOR_HV7131R: gain[0] |= (2 << 4); gain[2] = 0x30; gain[3] = hv7131r_gain[g]; break; default: return; } i2c_w(gspca_dev, gain); } static void set_led_mode(struct gspca_dev *gspca_dev, s32 val) { reg_w1(gspca_dev, 0x1007, 0x60); reg_w1(gspca_dev, 0x1006, val ? 0x40 : 0x00); } static void set_quality(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; jpeg_set_qual(sd->jpeg_hdr, val); reg_w1(gspca_dev, 0x1061, 0x01); /* stop transfer */ reg_w1(gspca_dev, 0x10e0, sd->fmt | 0x20); /* write QTAB */ reg_w(gspca_dev, 0x1100, &sd->jpeg_hdr[JPEG_QT0_OFFSET], 64); reg_w(gspca_dev, 0x1140, &sd->jpeg_hdr[JPEG_QT1_OFFSET], 64); reg_w1(gspca_dev, 0x1061, 0x03); /* restart transfer */ reg_w1(gspca_dev, 0x10e0, sd->fmt); sd->fmt ^= 0x0c; /* invert QTAB use + write */ reg_w1(gspca_dev, 0x10e0, sd->fmt); } #ifdef CONFIG_VIDEO_ADV_DEBUG static int sd_dbg_g_register(struct gspca_dev *gspca_dev, struct v4l2_dbg_register *reg) { struct sd *sd = (struct sd *) gspca_dev; reg->size = 1; switch (reg->match.addr) { case 0: if (reg->reg < 0x1000 || reg->reg > 0x11ff) return -EINVAL; reg_r(gspca_dev, reg->reg, 1); reg->val = gspca_dev->usb_buf[0]; return gspca_dev->usb_err; case 1: if (sd->sensor >= SENSOR_MT9V011 && sd->sensor <= SENSOR_MT9M112) { i2c_r2(gspca_dev, reg->reg, (u16 *) &reg->val); reg->size = 2; } else { i2c_r1(gspca_dev, reg->reg, (u8 *) &reg->val); } return gspca_dev->usb_err; } return -EINVAL; } static int sd_dbg_s_register(struct gspca_dev *gspca_dev, const struct v4l2_dbg_register *reg) { struct sd *sd = (struct sd *) gspca_dev; switch (reg->match.addr) { case 0: if (reg->reg < 0x1000 || reg->reg > 0x11ff) return -EINVAL; reg_w1(gspca_dev, reg->reg, reg->val); return gspca_dev->usb_err; case 1: if (sd->sensor >= SENSOR_MT9V011 && sd->sensor <= SENSOR_MT9M112) { i2c_w2(gspca_dev, reg->reg, reg->val); } else { i2c_w1(gspca_dev, reg->reg, reg->val); } return gspca_dev->usb_err; } return -EINVAL; } static int sd_chip_info(struct gspca_dev *gspca_dev, struct v4l2_dbg_chip_info *chip) { if (chip->match.addr > 1) return -EINVAL; if (chip->match.addr == 1) strscpy(chip->name, "sensor", sizeof(chip->name)); return 0; } #endif static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; cam = &gspca_dev->cam; cam->needs_full_bandwidth = 1; sd->sensor = id->driver_info >> 8; sd->i2c_addr = id->driver_info; sd->flags = id->driver_info >> 16; sd->i2c_intf = 0x80; /* i2c 100 Kb/s */ switch (sd->sensor) { case SENSOR_MT9M112: case SENSOR_MT9M111: case SENSOR_OV9650: case SENSOR_SOI968: cam->cam_mode = sxga_mode; cam->nmodes = ARRAY_SIZE(sxga_mode); break; case SENSOR_MT9M001: cam->cam_mode = mono_mode; cam->nmodes = ARRAY_SIZE(mono_mode); break; case SENSOR_HV7131R: sd->i2c_intf = 0x81; /* i2c 400 Kb/s */ fallthrough; default: cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); break; } sd->old_step = 0; sd->older_step = 0; sd->exposure_step = 16; INIT_WORK(&sd->work, qual_upd); return 0; } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { /* color control cluster */ case V4L2_CID_BRIGHTNESS: set_cmatrix(gspca_dev, sd->brightness->val, sd->contrast->val, sd->saturation->val, sd->hue->val); break; case V4L2_CID_GAMMA: set_gamma(gspca_dev, ctrl->val); break; /* blue/red balance cluster */ case V4L2_CID_BLUE_BALANCE: set_redblue(gspca_dev, sd->blue->val, sd->red->val); break; /* h/vflip cluster */ case V4L2_CID_HFLIP: set_hvflip(gspca_dev, sd->hflip->val, sd->vflip->val); break; /* standalone exposure control */ case V4L2_CID_EXPOSURE: set_exposure(gspca_dev, ctrl->val); break; /* standalone gain control */ case V4L2_CID_GAIN: set_gain(gspca_dev, ctrl->val); break; /* autogain + exposure or gain control cluster */ case V4L2_CID_AUTOGAIN: if (sd->sensor == SENSOR_SOI968) set_gain(gspca_dev, sd->gain->val); else set_exposure(gspca_dev, sd->exposure->val); break; case V4L2_CID_JPEG_COMPRESSION_QUALITY: set_quality(gspca_dev, ctrl->val); break; case V4L2_CID_FLASH_LED_MODE: set_led_mode(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 13); sd->brightness = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 127); sd->contrast = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 127); sd->saturation = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 255, 1, 127); sd->hue = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, -180, 180, 1, 0); sd->gamma = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAMMA, 0, 255, 1, 0x10); sd->blue = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BLUE_BALANCE, 0, 127, 1, 0x28); sd->red = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_RED_BALANCE, 0, 127, 1, 0x28); if (sd->sensor != SENSOR_OV9655 && sd->sensor != SENSOR_SOI968 && sd->sensor != SENSOR_OV7670 && sd->sensor != SENSOR_MT9M001 && sd->sensor != SENSOR_MT9VPRB) { sd->hflip = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); sd->vflip = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); } if (sd->sensor != SENSOR_SOI968 && sd->sensor != SENSOR_MT9VPRB && sd->sensor != SENSOR_MT9M112 && sd->sensor != SENSOR_MT9M111 && sd->sensor != SENSOR_MT9V111) sd->exposure = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_EXPOSURE, 0, 0x1780, 1, 0x33); if (sd->sensor != SENSOR_MT9VPRB && sd->sensor != SENSOR_MT9M112 && sd->sensor != SENSOR_MT9M111 && sd->sensor != SENSOR_MT9V111) { sd->gain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAIN, 0, 28, 1, 0); sd->autogain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); } sd->jpegqual = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_JPEG_COMPRESSION_QUALITY, 50, 90, 1, 80); if (sd->flags & HAS_LED_TORCH) sd->led_mode = v4l2_ctrl_new_std_menu(hdl, &sd_ctrl_ops, V4L2_CID_FLASH_LED_MODE, V4L2_FLASH_LED_MODE_TORCH, ~0x5, V4L2_FLASH_LED_MODE_NONE); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } v4l2_ctrl_cluster(4, &sd->brightness); v4l2_ctrl_cluster(2, &sd->blue); if (sd->hflip) v4l2_ctrl_cluster(2, &sd->hflip); if (sd->autogain) { if (sd->sensor == SENSOR_SOI968) /* this sensor doesn't have the exposure control and autogain is clustered with gain instead. This works because sd->exposure == NULL. */ v4l2_ctrl_auto_cluster(3, &sd->autogain, 0, false); else /* Otherwise autogain is clustered with exposure. */ v4l2_ctrl_auto_cluster(2, &sd->autogain, 0, false); } return 0; } static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int i; u8 value; u8 i2c_init[9] = { 0x80, sd->i2c_addr, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 }; for (i = 0; i < ARRAY_SIZE(bridge_init); i++) { value = bridge_init[i][1]; reg_w(gspca_dev, bridge_init[i][0], &value, 1); if (gspca_dev->usb_err < 0) { pr_err("Device initialization failed\n"); return gspca_dev->usb_err; } } if (sd->flags & LED_REVERSE) reg_w1(gspca_dev, 0x1006, 0x00); else reg_w1(gspca_dev, 0x1006, 0x20); reg_w(gspca_dev, 0x10c0, i2c_init, 9); if (gspca_dev->usb_err < 0) { pr_err("Device initialization failed\n"); return gspca_dev->usb_err; } switch (sd->sensor) { case SENSOR_OV9650: ov9650_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("OV9650 sensor detected\n"); break; case SENSOR_OV9655: ov9655_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("OV9655 sensor detected\n"); break; case SENSOR_SOI968: soi968_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("SOI968 sensor detected\n"); break; case SENSOR_OV7660: ov7660_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("OV7660 sensor detected\n"); break; case SENSOR_OV7670: ov7670_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("OV7670 sensor detected\n"); break; case SENSOR_MT9VPRB: mt9v_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("MT9VPRB sensor detected\n"); break; case SENSOR_MT9M111: mt9m111_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("MT9M111 sensor detected\n"); break; case SENSOR_MT9M112: mt9m112_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("MT9M112 sensor detected\n"); break; case SENSOR_MT9M001: mt9m001_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; break; case SENSOR_HV7131R: hv7131r_init_sensor(gspca_dev); if (gspca_dev->usb_err < 0) break; pr_info("HV7131R sensor detected\n"); break; default: pr_err("Unsupported sensor\n"); gspca_dev->usb_err = -ENODEV; } return gspca_dev->usb_err; } static void configure_sensor_output(struct gspca_dev *gspca_dev, int mode) { struct sd *sd = (struct sd *) gspca_dev; u8 value; switch (sd->sensor) { case SENSOR_SOI968: if (mode & MODE_SXGA) { i2c_w1(gspca_dev, 0x17, 0x1d); i2c_w1(gspca_dev, 0x18, 0xbd); i2c_w1(gspca_dev, 0x19, 0x01); i2c_w1(gspca_dev, 0x1a, 0x81); i2c_w1(gspca_dev, 0x12, 0x00); sd->hstart = 140; sd->vstart = 19; } else { i2c_w1(gspca_dev, 0x17, 0x13); i2c_w1(gspca_dev, 0x18, 0x63); i2c_w1(gspca_dev, 0x19, 0x01); i2c_w1(gspca_dev, 0x1a, 0x79); i2c_w1(gspca_dev, 0x12, 0x40); sd->hstart = 60; sd->vstart = 11; } break; case SENSOR_OV9650: if (mode & MODE_SXGA) { i2c_w1(gspca_dev, 0x17, 0x1b); i2c_w1(gspca_dev, 0x18, 0xbc); i2c_w1(gspca_dev, 0x19, 0x01); i2c_w1(gspca_dev, 0x1a, 0x82); i2c_r1(gspca_dev, 0x12, &value); i2c_w1(gspca_dev, 0x12, value & 0x07); } else { i2c_w1(gspca_dev, 0x17, 0x24); i2c_w1(gspca_dev, 0x18, 0xc5); i2c_w1(gspca_dev, 0x19, 0x00); i2c_w1(gspca_dev, 0x1a, 0x3c); i2c_r1(gspca_dev, 0x12, &value); i2c_w1(gspca_dev, 0x12, (value & 0x7) | 0x40); } break; case SENSOR_MT9M112: case SENSOR_MT9M111: if (mode & MODE_SXGA) { i2c_w2(gspca_dev, 0xf0, 0x0002); i2c_w2(gspca_dev, 0xc8, 0x970b); i2c_w2(gspca_dev, 0xf0, 0x0000); } else { i2c_w2(gspca_dev, 0xf0, 0x0002); i2c_w2(gspca_dev, 0xc8, 0x8000); i2c_w2(gspca_dev, 0xf0, 0x0000); } break; } } static int sd_isoc_init(struct gspca_dev *gspca_dev) { struct usb_interface *intf; u32 flags = gspca_dev->cam.cam_mode[(int)gspca_dev->curr_mode].priv; /* * When using the SN9C20X_I420 fmt the sn9c20x needs more bandwidth * than our regular bandwidth calculations reserve, so we force the * use of a specific altsetting when using the SN9C20X_I420 fmt. */ if (!(flags & (MODE_RAW | MODE_JPEG))) { intf = usb_ifnum_to_if(gspca_dev->dev, gspca_dev->iface); if (intf->num_altsetting != 9) { pr_warn("sn9c20x camera with unknown number of alt settings (%d), please report!\n", intf->num_altsetting); gspca_dev->alt = intf->num_altsetting; return 0; } switch (gspca_dev->pixfmt.width) { case 160: /* 160x120 */ gspca_dev->alt = 2; break; case 320: /* 320x240 */ gspca_dev->alt = 6; break; default: /* >= 640x480 */ gspca_dev->alt = 9; break; } } return 0; } #define HW_WIN(mode, hstart, vstart) \ ((const u8 []){hstart, 0, vstart, 0, \ (mode & MODE_SXGA ? 1280 >> 4 : 640 >> 4), \ (mode & MODE_SXGA ? 1024 >> 3 : 480 >> 3)}) #define CLR_WIN(width, height) \ ((const u8 [])\ {0, width >> 2, 0, height >> 1,\ ((width >> 10) & 0x01) | ((height >> 8) & 0x6)}) static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; int width = gspca_dev->pixfmt.width; int height = gspca_dev->pixfmt.height; u8 fmt, scale = 0; jpeg_define(sd->jpeg_hdr, height, width, 0x21); jpeg_set_qual(sd->jpeg_hdr, v4l2_ctrl_g_ctrl(sd->jpegqual)); if (mode & MODE_RAW) fmt = 0x2d; else if (mode & MODE_JPEG) fmt = 0x24; else fmt = 0x2f; /* YUV 420 */ sd->fmt = fmt; switch (mode & SCALE_MASK) { case SCALE_1280x1024: scale = 0xc0; pr_info("Set 1280x1024\n"); break; case SCALE_640x480: scale = 0x80; pr_info("Set 640x480\n"); break; case SCALE_320x240: scale = 0x90; pr_info("Set 320x240\n"); break; case SCALE_160x120: scale = 0xa0; pr_info("Set 160x120\n"); break; } configure_sensor_output(gspca_dev, mode); reg_w(gspca_dev, 0x1100, &sd->jpeg_hdr[JPEG_QT0_OFFSET], 64); reg_w(gspca_dev, 0x1140, &sd->jpeg_hdr[JPEG_QT1_OFFSET], 64); reg_w(gspca_dev, 0x10fb, CLR_WIN(width, height), 5); reg_w(gspca_dev, 0x1180, HW_WIN(mode, sd->hstart, sd->vstart), 6); reg_w1(gspca_dev, 0x1189, scale); reg_w1(gspca_dev, 0x10e0, fmt); set_cmatrix(gspca_dev, v4l2_ctrl_g_ctrl(sd->brightness), v4l2_ctrl_g_ctrl(sd->contrast), v4l2_ctrl_g_ctrl(sd->saturation), v4l2_ctrl_g_ctrl(sd->hue)); set_gamma(gspca_dev, v4l2_ctrl_g_ctrl(sd->gamma)); set_redblue(gspca_dev, v4l2_ctrl_g_ctrl(sd->blue), v4l2_ctrl_g_ctrl(sd->red)); if (sd->gain) set_gain(gspca_dev, v4l2_ctrl_g_ctrl(sd->gain)); if (sd->exposure) set_exposure(gspca_dev, v4l2_ctrl_g_ctrl(sd->exposure)); if (sd->hflip) set_hvflip(gspca_dev, v4l2_ctrl_g_ctrl(sd->hflip), v4l2_ctrl_g_ctrl(sd->vflip)); reg_w1(gspca_dev, 0x1007, 0x20); reg_w1(gspca_dev, 0x1061, 0x03); /* if JPEG, prepare the compression quality update */ if (mode & MODE_JPEG) { sd->pktsz = sd->npkt = 0; sd->nchg = 0; } if (sd->led_mode) v4l2_ctrl_s_ctrl(sd->led_mode, 0); return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { reg_w1(gspca_dev, 0x1007, 0x00); reg_w1(gspca_dev, 0x1061, 0x01); } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; mutex_unlock(&gspca_dev->usb_lock); flush_work(&sd->work); mutex_lock(&gspca_dev->usb_lock); } static void do_autoexposure(struct gspca_dev *gspca_dev, u16 avg_lum) { struct sd *sd = (struct sd *) gspca_dev; s32 cur_exp = v4l2_ctrl_g_ctrl(sd->exposure); s32 max = sd->exposure->maximum - sd->exposure_step; s32 min = sd->exposure->minimum + sd->exposure_step; s16 new_exp; /* * some hardcoded values are present * like those for maximal/minimal exposure * and exposure steps */ if (avg_lum < MIN_AVG_LUM) { if (cur_exp > max) return; new_exp = cur_exp + sd->exposure_step; if (new_exp > max) new_exp = max; if (new_exp < min) new_exp = min; v4l2_ctrl_s_ctrl(sd->exposure, new_exp); sd->older_step = sd->old_step; sd->old_step = 1; if (sd->old_step ^ sd->older_step) sd->exposure_step /= 2; else sd->exposure_step += 2; } if (avg_lum > MAX_AVG_LUM) { if (cur_exp < min) return; new_exp = cur_exp - sd->exposure_step; if (new_exp > max) new_exp = max; if (new_exp < min) new_exp = min; v4l2_ctrl_s_ctrl(sd->exposure, new_exp); sd->older_step = sd->old_step; sd->old_step = 0; if (sd->old_step ^ sd->older_step) sd->exposure_step /= 2; else sd->exposure_step += 2; } } static void do_autogain(struct gspca_dev *gspca_dev, u16 avg_lum) { struct sd *sd = (struct sd *) gspca_dev; s32 cur_gain = v4l2_ctrl_g_ctrl(sd->gain); if (avg_lum < MIN_AVG_LUM && cur_gain < sd->gain->maximum) v4l2_ctrl_s_ctrl(sd->gain, cur_gain + 1); if (avg_lum > MAX_AVG_LUM && cur_gain > sd->gain->minimum) v4l2_ctrl_s_ctrl(sd->gain, cur_gain - 1); } static void sd_dqcallback(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int avg_lum; if (sd->autogain == NULL || !v4l2_ctrl_g_ctrl(sd->autogain)) return; avg_lum = atomic_read(&sd->avg_lum); if (sd->sensor == SENSOR_SOI968) do_autogain(gspca_dev, avg_lum); else do_autoexposure(gspca_dev, avg_lum); } /* JPEG quality update */ /* This function is executed from a work queue. */ static void qual_upd(struct work_struct *work) { struct sd *sd = container_of(work, struct sd, work); struct gspca_dev *gspca_dev = &sd->gspca_dev; s32 qual = v4l2_ctrl_g_ctrl(sd->jpegqual); /* To protect gspca_dev->usb_buf and gspca_dev->usb_err */ mutex_lock(&gspca_dev->usb_lock); gspca_dbg(gspca_dev, D_STREAM, "qual_upd %d%%\n", qual); gspca_dev->usb_err = 0; set_quality(gspca_dev, qual); mutex_unlock(&gspca_dev->usb_lock); } #if IS_ENABLED(CONFIG_INPUT) static int sd_int_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* interrupt packet */ int len) /* interrupt packet length */ { struct sd *sd = (struct sd *) gspca_dev; if (!(sd->flags & HAS_NO_BUTTON) && len == 1) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 1); input_sync(gspca_dev->input_dev); input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); return 0; } return -EINVAL; } #endif /* check the JPEG compression */ static void transfer_check(struct gspca_dev *gspca_dev, u8 *data) { struct sd *sd = (struct sd *) gspca_dev; int new_qual, r; new_qual = 0; /* if USB error, discard the frame and decrease the quality */ if (data[6] & 0x08) { /* USB FIFO full */ gspca_dev->last_packet_type = DISCARD_PACKET; new_qual = -5; } else { /* else, compute the filling rate and a new JPEG quality */ r = (sd->pktsz * 100) / (sd->npkt * gspca_dev->urb[0]->iso_frame_desc[0].length); if (r >= 85) new_qual = -3; else if (r < 75) new_qual = 2; } if (new_qual != 0) { sd->nchg += new_qual; if (sd->nchg < -6 || sd->nchg >= 12) { /* Note: we are in interrupt context, so we can't use v4l2_ctrl_g/s_ctrl here. Access the value directly instead. */ s32 curqual = sd->jpegqual->cur.val; sd->nchg = 0; new_qual += curqual; if (new_qual < sd->jpegqual->minimum) new_qual = sd->jpegqual->minimum; else if (new_qual > sd->jpegqual->maximum) new_qual = sd->jpegqual->maximum; if (new_qual != curqual) { sd->jpegqual->cur.val = new_qual; schedule_work(&sd->work); } } } else { sd->nchg = 0; } sd->pktsz = sd->npkt = 0; } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; int avg_lum, is_jpeg; static const u8 frame_header[] = { 0xff, 0xff, 0x00, 0xc4, 0xc4, 0x96 }; is_jpeg = (sd->fmt & 0x03) == 0; if (len >= 64 && memcmp(data, frame_header, 6) == 0) { avg_lum = ((data[35] >> 2) & 3) | (data[20] << 2) | (data[19] << 10); avg_lum += ((data[35] >> 4) & 3) | (data[22] << 2) | (data[21] << 10); avg_lum += ((data[35] >> 6) & 3) | (data[24] << 2) | (data[23] << 10); avg_lum += (data[36] & 3) | (data[26] << 2) | (data[25] << 10); avg_lum += ((data[36] >> 2) & 3) | (data[28] << 2) | (data[27] << 10); avg_lum += ((data[36] >> 4) & 3) | (data[30] << 2) | (data[29] << 10); avg_lum += ((data[36] >> 6) & 3) | (data[32] << 2) | (data[31] << 10); avg_lum += ((data[44] >> 4) & 3) | (data[34] << 2) | (data[33] << 10); avg_lum >>= 9; atomic_set(&sd->avg_lum, avg_lum); if (is_jpeg) transfer_check(gspca_dev, data); gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); len -= 64; if (len == 0) return; data += 64; } if (gspca_dev->last_packet_type == LAST_PACKET) { if (is_jpeg) { gspca_frame_add(gspca_dev, FIRST_PACKET, sd->jpeg_hdr, JPEG_HDR_SZ); gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } else { gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); } } else { /* if JPEG, count the packets and their size */ if (is_jpeg) { sd->npkt++; sd->pktsz += len; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = KBUILD_MODNAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .isoc_init = sd_isoc_init, .start = sd_start, .stopN = sd_stopN, .stop0 = sd_stop0, .pkt_scan = sd_pkt_scan, #if IS_ENABLED(CONFIG_INPUT) .int_pkt_scan = sd_int_pkt_scan, #endif .dq_callback = sd_dqcallback, #ifdef CONFIG_VIDEO_ADV_DEBUG .set_register = sd_dbg_s_register, .get_register = sd_dbg_g_register, .get_chip_info = sd_chip_info, #endif }; #define SN9C20X(sensor, i2c_addr, flags) \ .driver_info = ((flags & 0xff) << 16) \ | (SENSOR_ ## sensor << 8) \ | (i2c_addr) static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0c45, 0x6240), SN9C20X(MT9M001, 0x5d, 0)}, {USB_DEVICE(0x0c45, 0x6242), SN9C20X(MT9M111, 0x5d, HAS_LED_TORCH)}, {USB_DEVICE(0x0c45, 0x6248), SN9C20X(OV9655, 0x30, 0)}, {USB_DEVICE(0x0c45, 0x624c), SN9C20X(MT9M112, 0x5d, 0)}, {USB_DEVICE(0x0c45, 0x624e), SN9C20X(SOI968, 0x30, LED_REVERSE)}, {USB_DEVICE(0x0c45, 0x624f), SN9C20X(OV9650, 0x30, (FLIP_DETECT | HAS_NO_BUTTON))}, {USB_DEVICE(0x0c45, 0x6251), SN9C20X(OV9650, 0x30, 0)}, {USB_DEVICE(0x0c45, 0x6253), SN9C20X(OV9650, 0x30, 0)}, {USB_DEVICE(0x0c45, 0x6260), SN9C20X(OV7670, 0x21, 0)}, {USB_DEVICE(0x0c45, 0x6270), SN9C20X(MT9VPRB, 0x00, 0)}, {USB_DEVICE(0x0c45, 0x627b), SN9C20X(OV7660, 0x21, FLIP_DETECT)}, {USB_DEVICE(0x0c45, 0x627c), SN9C20X(HV7131R, 0x11, 0)}, {USB_DEVICE(0x0c45, 0x627f), SN9C20X(OV9650, 0x30, 0)}, {USB_DEVICE(0x0c45, 0x6280), SN9C20X(MT9M001, 0x5d, 0)}, {USB_DEVICE(0x0c45, 0x6282), SN9C20X(MT9M111, 0x5d, 0)}, {USB_DEVICE(0x0c45, 0x6288), SN9C20X(OV9655, 0x30, 0)}, {USB_DEVICE(0x0c45, 0x628c), SN9C20X(MT9M112, 0x5d, 0)}, {USB_DEVICE(0x0c45, 0x628e), SN9C20X(SOI968, 0x30, 0)}, {USB_DEVICE(0x0c45, 0x628f), SN9C20X(OV9650, 0x30, 0)}, {USB_DEVICE(0x0c45, 0x62a0), SN9C20X(OV7670, 0x21, 0)}, {USB_DEVICE(0x0c45, 0x62b0), SN9C20X(MT9VPRB, 0x00, 0)}, {USB_DEVICE(0x0c45, 0x62b3), SN9C20X(OV9655, 0x30, LED_REVERSE)}, {USB_DEVICE(0x0c45, 0x62bb), SN9C20X(OV7660, 0x21, LED_REVERSE)}, {USB_DEVICE(0x0c45, 0x62bc), SN9C20X(HV7131R, 0x11, 0)}, {USB_DEVICE(0x045e, 0x00f4), SN9C20X(OV9650, 0x30, 0)}, {USB_DEVICE(0x145f, 0x013d), SN9C20X(OV7660, 0x21, 0)}, {USB_DEVICE(0x0458, 0x7029), SN9C20X(HV7131R, 0x11, 0)}, {USB_DEVICE(0x0458, 0x7045), SN9C20X(MT9M112, 0x5d, LED_REVERSE)}, {USB_DEVICE(0x0458, 0x704a), SN9C20X(MT9M112, 0x5d, 0)}, {USB_DEVICE(0x0458, 0x704c), SN9C20X(MT9M112, 0x5d, 0)}, {USB_DEVICE(0xa168, 0x0610), SN9C20X(HV7131R, 0x11, 0)}, {USB_DEVICE(0xa168, 0x0611), SN9C20X(HV7131R, 0x11, 0)}, {USB_DEVICE(0xa168, 0x0613), SN9C20X(HV7131R, 0x11, 0)}, {USB_DEVICE(0xa168, 0x0618), SN9C20X(HV7131R, 0x11, 0)}, {USB_DEVICE(0xa168, 0x0614), SN9C20X(MT9M111, 0x5d, 0)}, {USB_DEVICE(0xa168, 0x0615), SN9C20X(MT9M111, 0x5d, 0)}, {USB_DEVICE(0xa168, 0x0617), SN9C20X(MT9M111, 0x5d, 0)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = KBUILD_MODNAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
76 96 86 1 9 2 1 79 12 1 11 79 1 59 30 1 69 19 1 64 24 79 51 28 60 18 10 55 23 13 80 2 1 1 76 3 69 6 1 71 130 198 2 1 197 190 13 13 190 10 10 4 10 10 199 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 // SPDX-License-Identifier: GPL-2.0-only /* * VLAN netlink control interface * * Copyright (c) 2007 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/module.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/rtnetlink.h> #include "vlan.h" static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = { [IFLA_VLAN_ID] = { .type = NLA_U16 }, [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, [IFLA_VLAN_PROTOCOL] = { .type = NLA_U16 }, }; static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { [IFLA_VLAN_QOS_MAPPING] = { .len = sizeof(struct ifla_vlan_qos_mapping) }, }; static inline int vlan_validate_qos_map(struct nlattr *attr) { if (!attr) return 0; return nla_validate_nested_deprecated(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy, NULL); } static int vlan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ifla_vlan_flags *flags; u16 id; int err; if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { NL_SET_ERR_MSG_MOD(extack, "Invalid link address"); return -EINVAL; } if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { NL_SET_ERR_MSG_MOD(extack, "Invalid link address"); return -EADDRNOTAVAIL; } } if (!data) { NL_SET_ERR_MSG_MOD(extack, "VLAN properties not specified"); return -EINVAL; } if (data[IFLA_VLAN_PROTOCOL]) { switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): break; default: NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN protocol"); return -EPROTONOSUPPORT; } } if (data[IFLA_VLAN_ID]) { id = nla_get_u16(data[IFLA_VLAN_ID]); if (id >= VLAN_VID_MASK) { NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN id"); return -ERANGE; } } if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP | VLAN_FLAG_BRIDGE_BINDING)) { NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN flags"); return -EINVAL; } } err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]); if (err < 0) { NL_SET_ERR_MSG_MOD(extack, "Invalid ingress QOS map"); return err; } err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]); if (err < 0) { NL_SET_ERR_MSG_MOD(extack, "Invalid egress QOS map"); return err; } return 0; } static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; int rem, err; if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); err = vlan_dev_change_flags(dev, flags->flags, flags->mask); if (err) return err; } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING, data[IFLA_VLAN_INGRESS_QOS], rem) { m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING, data[IFLA_VLAN_EGRESS_QOS], rem) { m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) return err; } } return 0; } static int vlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev; unsigned int max_mtu; __be16 proto; int err; if (!data[IFLA_VLAN_ID]) { NL_SET_ERR_MSG_MOD(extack, "VLAN id not specified"); return -EINVAL; } if (!tb[IFLA_LINK]) { NL_SET_ERR_MSG_MOD(extack, "link not specified"); return -EINVAL; } real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) { NL_SET_ERR_MSG_MOD(extack, "link does not exist"); return -ENODEV; } proto = nla_get_be16_default(data[IFLA_VLAN_PROTOCOL], htons(ETH_P_8021Q)); vlan->vlan_proto = proto; vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); vlan->real_dev = real_dev; dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE); vlan->flags = VLAN_FLAG_REORDER_HDR; err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id, extack); if (err < 0) return err; max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN : real_dev->mtu; if (!tb[IFLA_MTU]) dev->mtu = max_mtu; else if (dev->mtu > max_mtu) return -EINVAL; /* Note: If this initial vlan_changelink() fails, we need * to call vlan_dev_free_egress_priority() to free memory. */ err = vlan_changelink(dev, tb, data, extack); if (!err) err = register_vlan_dev(dev, extack); if (err) vlan_dev_free_egress_priority(dev); return err; } static inline size_t vlan_qos_map_size(unsigned int n) { if (n == 0) return 0; /* IFLA_VLAN_{EGRESS,INGRESS}_QOS + n * IFLA_VLAN_QOS_MAPPING */ return nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(struct ifla_vlan_qos_mapping)) * n; } static size_t vlan_get_size(const struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ nla_total_size(2) + /* IFLA_VLAN_ID */ nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); } static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_priority_tci_mapping *pm; struct ifla_vlan_flags f; struct ifla_vlan_qos_mapping m; struct nlattr *nest; unsigned int i; if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) || nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id)) goto nla_put_failure; if (vlan->flags) { f.flags = vlan->flags; f.mask = ~0; if (nla_put(skb, IFLA_VLAN_FLAGS, sizeof(f), &f)) goto nla_put_failure; } if (vlan->nr_ingress_mappings) { nest = nla_nest_start_noflag(skb, IFLA_VLAN_INGRESS_QOS); if (nest == NULL) goto nla_put_failure; for (i = 0; i < ARRAY_SIZE(vlan->ingress_priority_map); i++) { if (!vlan->ingress_priority_map[i]) continue; m.from = i; m.to = vlan->ingress_priority_map[i]; if (nla_put(skb, IFLA_VLAN_QOS_MAPPING, sizeof(m), &m)) goto nla_put_failure; } nla_nest_end(skb, nest); } if (vlan->nr_egress_mappings) { nest = nla_nest_start_noflag(skb, IFLA_VLAN_EGRESS_QOS); if (nest == NULL) goto nla_put_failure; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { for (pm = vlan->egress_priority_map[i]; pm; pm = pm->next) { if (!pm->vlan_qos) continue; m.from = pm->priority; m.to = (pm->vlan_qos >> 13) & 0x7; if (nla_put(skb, IFLA_VLAN_QOS_MAPPING, sizeof(m), &m)) goto nla_put_failure; } } nla_nest_end(skb, nest); } return 0; nla_put_failure: return -EMSGSIZE; } static struct net *vlan_get_link_net(const struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; return dev_net(real_dev); } struct rtnl_link_ops vlan_link_ops __read_mostly = { .kind = "vlan", .maxtype = IFLA_VLAN_MAX, .policy = vlan_policy, .priv_size = sizeof(struct vlan_dev_priv), .setup = vlan_setup, .validate = vlan_validate, .newlink = vlan_newlink, .changelink = vlan_changelink, .dellink = unregister_vlan_dev, .get_size = vlan_get_size, .fill_info = vlan_fill_info, .get_link_net = vlan_get_link_net, }; int __init vlan_netlink_init(void) { return rtnl_link_register(&vlan_link_ops); } void __exit vlan_netlink_fini(void) { rtnl_link_unregister(&vlan_link_ops); } MODULE_ALIAS_RTNL_LINK("vlan");
8 9 8 8 9 1 8 8 8 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 // SPDX-License-Identifier: GPL-2.0-only /* * spectrum management * * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2018, 2020, 2022-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <net/cfg80211.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "sta_info.h" #include "wme.h" static bool wbcs_elem_to_chandef(const struct ieee80211_wide_bw_chansw_ie *wbcs_elem, struct cfg80211_chan_def *chandef) { u8 ccfs0 = wbcs_elem->new_center_freq_seg0; u8 ccfs1 = wbcs_elem->new_center_freq_seg1; u32 cf0 = ieee80211_channel_to_frequency(ccfs0, chandef->chan->band); u32 cf1 = ieee80211_channel_to_frequency(ccfs1, chandef->chan->band); switch (wbcs_elem->new_channel_width) { case IEEE80211_VHT_CHANWIDTH_160MHZ: /* deprecated encoding */ chandef->width = NL80211_CHAN_WIDTH_160; chandef->center_freq1 = cf0; break; case IEEE80211_VHT_CHANWIDTH_80P80MHZ: /* deprecated encoding */ chandef->width = NL80211_CHAN_WIDTH_80P80; chandef->center_freq1 = cf0; chandef->center_freq2 = cf1; break; case IEEE80211_VHT_CHANWIDTH_80MHZ: chandef->width = NL80211_CHAN_WIDTH_80; chandef->center_freq1 = cf0; if (ccfs1) { u8 diff = abs(ccfs0 - ccfs1); if (diff == 8) { chandef->width = NL80211_CHAN_WIDTH_160; chandef->center_freq1 = cf1; } else if (diff > 8) { chandef->width = NL80211_CHAN_WIDTH_80P80; chandef->center_freq2 = cf1; } } break; case IEEE80211_VHT_CHANWIDTH_USE_HT: default: /* If the WBCS Element is present, new channel bandwidth is * at least 40 MHz. */ chandef->width = NL80211_CHAN_WIDTH_40; chandef->center_freq1 = cf0; break; } return cfg80211_chandef_valid(chandef); } static void validate_chandef_by_ht_vht_oper(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, u32 vht_cap_info, struct cfg80211_chan_def *chandef) { u32 control_freq, center_freq1, center_freq2; enum nl80211_chan_width chan_width; struct ieee80211_ht_operation ht_oper; struct ieee80211_vht_operation vht_oper; if (conn->mode < IEEE80211_CONN_MODE_HT || conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { chandef->chan = NULL; return; } control_freq = chandef->chan->center_freq; center_freq1 = chandef->center_freq1; center_freq2 = chandef->center_freq2; chan_width = chandef->width; ht_oper.primary_chan = ieee80211_frequency_to_channel(control_freq); if (control_freq != center_freq1) ht_oper.ht_param = control_freq > center_freq1 ? IEEE80211_HT_PARAM_CHA_SEC_BELOW : IEEE80211_HT_PARAM_CHA_SEC_ABOVE; else ht_oper.ht_param = IEEE80211_HT_PARAM_CHA_SEC_NONE; ieee80211_chandef_ht_oper(&ht_oper, chandef); if (conn->mode < IEEE80211_CONN_MODE_VHT) return; vht_oper.center_freq_seg0_idx = ieee80211_frequency_to_channel(center_freq1); vht_oper.center_freq_seg1_idx = center_freq2 ? ieee80211_frequency_to_channel(center_freq2) : 0; switch (chan_width) { case NL80211_CHAN_WIDTH_320: WARN_ON(1); break; case NL80211_CHAN_WIDTH_160: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; vht_oper.center_freq_seg1_idx = vht_oper.center_freq_seg0_idx; vht_oper.center_freq_seg0_idx += control_freq < center_freq1 ? -8 : 8; break; case NL80211_CHAN_WIDTH_80P80: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; break; case NL80211_CHAN_WIDTH_80: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; break; default: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_USE_HT; break; } ht_oper.operation_mode = le16_encode_bits(vht_oper.center_freq_seg1_idx, IEEE80211_HT_OP_MODE_CCFS2_MASK); if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, &vht_oper, &ht_oper, chandef)) chandef->chan = NULL; } static void validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = sdata->local; u32 control_freq, center_freq1, center_freq2; enum nl80211_chan_width chan_width; struct { struct ieee80211_he_operation _oper; struct ieee80211_he_6ghz_oper _6ghz_oper; } __packed he; struct { struct ieee80211_eht_operation _oper; struct ieee80211_eht_operation_info _oper_info; } __packed eht; const struct ieee80211_eht_operation *eht_oper; if (conn->mode < IEEE80211_CONN_MODE_HE) { chandef->chan = NULL; return; } control_freq = chandef->chan->center_freq; center_freq1 = chandef->center_freq1; center_freq2 = chandef->center_freq2; chan_width = chandef->width; he._oper.he_oper_params = le32_encode_bits(1, IEEE80211_HE_OPERATION_6GHZ_OP_INFO); he._6ghz_oper.primary = ieee80211_frequency_to_channel(control_freq); he._6ghz_oper.ccfs0 = ieee80211_frequency_to_channel(center_freq1); he._6ghz_oper.ccfs1 = center_freq2 ? ieee80211_frequency_to_channel(center_freq2) : 0; switch (chan_width) { case NL80211_CHAN_WIDTH_320: he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -16 : 16; he._6ghz_oper.control = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ; break; case NL80211_CHAN_WIDTH_160: he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -8 : 8; fallthrough; case NL80211_CHAN_WIDTH_80P80: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ; break; case NL80211_CHAN_WIDTH_80: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ; break; case NL80211_CHAN_WIDTH_40: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ; break; default: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ; break; } if (conn->mode < IEEE80211_CONN_MODE_EHT) { eht_oper = NULL; } else { eht._oper.params = IEEE80211_EHT_OPER_INFO_PRESENT; eht._oper_info.control = he._6ghz_oper.control; eht._oper_info.ccfs0 = he._6ghz_oper.ccfs0; eht._oper_info.ccfs1 = he._6ghz_oper.ccfs1; eht_oper = &eht._oper; } if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper, eht_oper, chandef)) chandef->chan = NULL; } int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band current_band, u32 vht_cap_info, struct ieee80211_conn_settings *conn, u8 *bssid, bool unprot_action, struct ieee80211_csa_ie *csa_ie) { enum nl80211_band new_band = current_band; int new_freq; u8 new_chan_no = 0, new_op_class = 0; struct ieee80211_channel *new_chan; struct cfg80211_chan_def new_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; const struct ieee80211_bandwidth_indication *bwi; const struct ieee80211_ext_chansw_ie *ext_chansw_elem; int secondary_channel_offset = -1; memset(csa_ie, 0, sizeof(*csa_ie)); sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; bwi = elems->bandwidth_indication; ext_chansw_elem = elems->ext_chansw_ie; if (conn->mode < IEEE80211_CONN_MODE_HT || conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; } if (conn->mode < IEEE80211_CONN_MODE_VHT) wide_bw_chansw_ie = NULL; if (ext_chansw_elem) { new_op_class = ext_chansw_elem->new_operating_class; if (!ieee80211_operating_class_to_band(new_op_class, &new_band)) { new_op_class = 0; if (!unprot_action) sdata_info(sdata, "cannot understand ECSA IE operating class, %d, ignoring\n", ext_chansw_elem->new_operating_class); } else { new_chan_no = ext_chansw_elem->new_ch_num; csa_ie->count = ext_chansw_elem->count; csa_ie->mode = ext_chansw_elem->mode; } } if (!new_op_class && elems->ch_switch_ie) { new_chan_no = elems->ch_switch_ie->new_ch_num; csa_ie->count = elems->ch_switch_ie->count; csa_ie->mode = elems->ch_switch_ie->mode; } /* nothing here we understand */ if (!new_chan_no) return 1; /* Mesh Channel Switch Parameters Element */ if (elems->mesh_chansw_params_ie) { csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl; csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; csa_ie->pre_value = le16_to_cpu( elems->mesh_chansw_params_ie->mesh_pre_value); if (elems->mesh_chansw_params_ie->mesh_flags & WLAN_EID_CHAN_SWITCH_PARAM_REASON) csa_ie->reason_code = le16_to_cpu( elems->mesh_chansw_params_ie->mesh_reason); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) { if (!unprot_action) sdata_info(sdata, "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n", bssid, new_freq); return -EINVAL; } if (sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; } else if (conn->mode >= IEEE80211_CONN_MODE_HT) { /* If the secondary channel offset IE is not present, * we can't know what's the post-CSA offset, so the * best we can do is use 20MHz. */ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; } switch (secondary_channel_offset) { default: /* secondary_channel_offset was present but is invalid */ case IEEE80211_HT_PARAM_CHA_SEC_NONE: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT20); break; case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT40PLUS); break; case IEEE80211_HT_PARAM_CHA_SEC_BELOW: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT40MINUS); break; case -1: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_NO_HT); /* keep width for 5/10 MHz channels */ switch (sdata->vif.bss_conf.chanreq.oper.width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: csa_ie->chanreq.oper.width = sdata->vif.bss_conf.chanreq.oper.width; break; default: break; } break; } /* capture the AP configuration */ csa_ie->chanreq.ap = csa_ie->chanreq.oper; /* parse one of the Elements to build a new chandef */ memset(&new_chandef, 0, sizeof(new_chandef)); new_chandef.chan = new_chan; if (bwi) { /* start with the CSA one */ new_chandef = csa_ie->chanreq.oper; /* and update the width accordingly */ ieee80211_chandef_eht_oper(&bwi->info, &new_chandef); if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT) new_chandef.punctured = get_unaligned_le16(bwi->info.optional); } else if (!wide_bw_chansw_ie || !wbcs_elem_to_chandef(wide_bw_chansw_ie, &new_chandef)) { if (!ieee80211_operating_class_to_chandef(new_op_class, new_chan, &new_chandef)) new_chandef = csa_ie->chanreq.oper; } /* check if the new chandef fits the capabilities */ if (new_band == NL80211_BAND_6GHZ) validate_chandef_by_6ghz_he_eht_oper(sdata, conn, &new_chandef); else validate_chandef_by_ht_vht_oper(sdata, conn, vht_cap_info, &new_chandef); /* if data is there validate the bandwidth & use it */ if (new_chandef.chan) { /* capture the AP chandef before (potential) downgrading */ csa_ie->chanreq.ap = new_chandef; while (conn->bw_limit < ieee80211_min_bw_limit_from_chandef(&new_chandef)) ieee80211_chandef_downgrade(&new_chandef, NULL); if (!cfg80211_chandef_compatible(&new_chandef, &csa_ie->chanreq.oper)) { sdata_info(sdata, "BSS %pM: CSA has inconsistent channel data, disconnecting\n", bssid); return -EINVAL; } csa_ie->chanreq.oper = new_chandef; } if (elems->max_channel_switch_time) csa_ie->max_switch_time = (elems->max_channel_switch_time[0] << 0) | (elems->max_channel_switch_time[1] << 8) | (elems->max_channel_switch_time[2] << 16); return 0; } static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata, struct ieee80211_msrment_ie *request_ie, const u8 *da, const u8 *bssid, u8 dialog_token) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *msr_report; skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + sizeof(struct ieee80211_msrment_ie)); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); msr_report = skb_put_zero(skb, 24); memcpy(msr_report->da, da, ETH_ALEN); memcpy(msr_report->sa, sdata->vif.addr, ETH_ALEN); memcpy(msr_report->bssid, bssid, ETH_ALEN); msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; msr_report->u.action.u.measurement.action_code = WLAN_ACTION_SPCT_MSR_RPRT; msr_report->u.action.u.measurement.dialog_token = dialog_token; msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; msr_report->u.action.u.measurement.length = sizeof(struct ieee80211_msrment_ie); memset(&msr_report->u.action.u.measurement.msr_elem, 0, sizeof(struct ieee80211_msrment_ie)); msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; msr_report->u.action.u.measurement.msr_elem.mode |= IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; ieee80211_tx_skb(sdata, skb); } void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { /* * Ignoring measurement request is spec violation. * Mandatory measurements must be reported optional * measurements might be refused or reported incapable * For now just refuse * TODO: Answer basic measurement as unmeasured */ ieee80211_send_refuse_measurement_request(sdata, &mgmt->u.action.u.measurement.msr_elem, mgmt->sa, mgmt->bssid, mgmt->u.action.u.measurement.dialog_token); }
684 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 /* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _LINUX_OF_H #define _LINUX_OF_H /* * Definitions for talking to the Open Firmware PROM on * Power Macintosh and other computers. * * Copyright (C) 1996-2005 Paul Mackerras. * * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp. * Updates for SPARC64 by David S. Miller * Derived from PowerPC and Sparc prom.h files by Stephen Rothwell, IBM Corp. */ #include <linux/types.h> #include <linux/bitops.h> #include <linux/cleanup.h> #include <linux/errno.h> #include <linux/kobject.h> #include <linux/mod_devicetable.h> #include <linux/property.h> #include <linux/list.h> #include <asm/byteorder.h> typedef u32 phandle; typedef u32 ihandle; struct property { char *name; int length; void *value; struct property *next; #if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC) unsigned long _flags; #endif #if defined(CONFIG_OF_PROMTREE) unsigned int unique_id; #endif #if defined(CONFIG_OF_KOBJ) struct bin_attribute attr; #endif }; #if defined(CONFIG_SPARC) struct of_irq_controller; #endif struct device_node { const char *name; phandle phandle; const char *full_name; struct fwnode_handle fwnode; struct property *properties; struct property *deadprops; /* removed properties */ struct device_node *parent; struct device_node *child; struct device_node *sibling; #if defined(CONFIG_OF_KOBJ) struct kobject kobj; #endif unsigned long _flags; void *data; #if defined(CONFIG_SPARC) unsigned int unique_id; struct of_irq_controller *irq_trans; #endif }; #define MAX_PHANDLE_ARGS 16 struct of_phandle_args { struct device_node *np; int args_count; uint32_t args[MAX_PHANDLE_ARGS]; }; struct of_phandle_iterator { /* Common iterator information */ const char *cells_name; int cell_count; const struct device_node *parent; /* List size information */ const __be32 *list_end; const __be32 *phandle_end; /* Current position state */ const __be32 *cur; uint32_t cur_count; phandle phandle; struct device_node *node; }; struct of_reconfig_data { struct device_node *dn; struct property *prop; struct property *old_prop; }; extern const struct kobj_type of_node_ktype; extern const struct fwnode_operations of_fwnode_ops; /** * of_node_init - initialize a devicetree node * @node: Pointer to device node that has been created by kzalloc() * * On return the device_node refcount is set to one. Use of_node_put() * on @node when done to free the memory allocated for it. If the node * is NOT a dynamic node the memory will not be freed. The decision of * whether to free the memory will be done by node->release(), which is * of_node_release(). */ static inline void of_node_init(struct device_node *node) { #if defined(CONFIG_OF_KOBJ) kobject_init(&node->kobj, &of_node_ktype); #endif fwnode_init(&node->fwnode, &of_fwnode_ops); } #if defined(CONFIG_OF_KOBJ) #define of_node_kobj(n) (&(n)->kobj) #else #define of_node_kobj(n) NULL #endif #ifdef CONFIG_OF_DYNAMIC extern struct device_node *of_node_get(struct device_node *node); extern void of_node_put(struct device_node *node); #else /* CONFIG_OF_DYNAMIC */ /* Dummy ref counting routines - to be implemented later */ static inline struct device_node *of_node_get(struct device_node *node) { return node; } static inline void of_node_put(struct device_node *node) { } #endif /* !CONFIG_OF_DYNAMIC */ DEFINE_FREE(device_node, struct device_node *, if (_T) of_node_put(_T)) /* Pointer for first entry in chain of all nodes. */ extern struct device_node *of_root; extern struct device_node *of_chosen; extern struct device_node *of_aliases; extern struct device_node *of_stdout; /* * struct device_node flag descriptions * (need to be visible even when !CONFIG_OF) */ #define OF_DYNAMIC 1 /* (and properties) allocated via kmalloc */ #define OF_DETACHED 2 /* detached from the device tree */ #define OF_POPULATED 3 /* device already created */ #define OF_POPULATED_BUS 4 /* platform bus created for children */ #define OF_OVERLAY 5 /* allocated for an overlay */ #define OF_OVERLAY_FREE_CSET 6 /* in overlay cset being freed */ #define OF_BAD_ADDR ((u64)-1) #ifdef CONFIG_OF void of_core_init(void); static inline bool is_of_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &of_fwnode_ops; } #define to_of_node(__fwnode) \ ({ \ typeof(__fwnode) __to_of_node_fwnode = (__fwnode); \ \ is_of_node(__to_of_node_fwnode) ? \ container_of(__to_of_node_fwnode, \ struct device_node, fwnode) : \ NULL; \ }) #define of_fwnode_handle(node) \ ({ \ typeof(node) __of_fwnode_handle_node = (node); \ \ __of_fwnode_handle_node ? \ &__of_fwnode_handle_node->fwnode : NULL; \ }) static inline bool of_node_is_root(const struct device_node *node) { return node && (node->parent == NULL); } static inline int of_node_check_flag(const struct device_node *n, unsigned long flag) { return test_bit(flag, &n->_flags); } static inline int of_node_test_and_set_flag(struct device_node *n, unsigned long flag) { return test_and_set_bit(flag, &n->_flags); } static inline void of_node_set_flag(struct device_node *n, unsigned long flag) { set_bit(flag, &n->_flags); } static inline void of_node_clear_flag(struct device_node *n, unsigned long flag) { clear_bit(flag, &n->_flags); } #if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC) static inline int of_property_check_flag(const struct property *p, unsigned long flag) { return test_bit(flag, &p->_flags); } static inline void of_property_set_flag(struct property *p, unsigned long flag) { set_bit(flag, &p->_flags); } static inline void of_property_clear_flag(struct property *p, unsigned long flag) { clear_bit(flag, &p->_flags); } #endif extern struct device_node *__of_find_all_nodes(struct device_node *prev); extern struct device_node *of_find_all_nodes(struct device_node *prev); /* * OF address retrieval & translation */ /* Helper to read a big number; size is in cells (not bytes) */ static inline u64 of_read_number(const __be32 *cell, int size) { u64 r = 0; for (; size--; cell++) r = (r << 32) | be32_to_cpu(*cell); return r; } /* Like of_read_number, but we want an unsigned long result */ static inline unsigned long of_read_ulong(const __be32 *cell, int size) { /* toss away upper bits if unsigned long is smaller than u64 */ return of_read_number(cell, size); } #if defined(CONFIG_SPARC) #include <asm/prom.h> #endif #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) extern bool of_node_name_eq(const struct device_node *np, const char *name); extern bool of_node_name_prefix(const struct device_node *np, const char *prefix); static inline const char *of_node_full_name(const struct device_node *np) { return np ? np->full_name : "<no-node>"; } #define for_each_of_allnodes_from(from, dn) \ for (dn = __of_find_all_nodes(from); dn; dn = __of_find_all_nodes(dn)) #define for_each_of_allnodes(dn) for_each_of_allnodes_from(NULL, dn) extern struct device_node *of_find_node_by_name(struct device_node *from, const char *name); extern struct device_node *of_find_node_by_type(struct device_node *from, const char *type); extern struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compat); extern struct device_node *of_find_matching_node_and_match( struct device_node *from, const struct of_device_id *matches, const struct of_device_id **match); extern struct device_node *of_find_node_opts_by_path(const char *path, const char **opts); static inline struct device_node *of_find_node_by_path(const char *path) { return of_find_node_opts_by_path(path, NULL); } extern struct device_node *of_find_node_by_phandle(phandle handle); extern struct device_node *of_get_parent(const struct device_node *node); extern struct device_node *of_get_next_parent(struct device_node *node); extern struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_next_child_with_prefix(const struct device_node *node, struct device_node *prev, const char *prefix); extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_next_reserved_child( const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); extern int of_find_last_cache_level(unsigned int cpu); extern struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name); extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); extern bool of_property_read_bool(const struct device_node *np, const char *propname); extern int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size); extern int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value); extern int of_property_read_u64_index(const struct device_node *np, const char *propname, u32 index, u64 *out_value); extern int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max); extern int of_property_read_variable_u16_array(const struct device_node *np, const char *propname, u16 *out_values, size_t sz_min, size_t sz_max); extern int of_property_read_variable_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz_min, size_t sz_max); extern int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value); extern int of_property_read_variable_u64_array(const struct device_node *np, const char *propname, u64 *out_values, size_t sz_min, size_t sz_max); extern int of_property_read_string(const struct device_node *np, const char *propname, const char **out_string); extern int of_property_match_string(const struct device_node *np, const char *propname, const char *string); extern int of_property_read_string_helper(const struct device_node *np, const char *propname, const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); extern int of_device_compatible_match(const struct device_node *device, const char *const *compat); extern bool of_device_is_available(const struct device_node *device); extern bool of_device_is_big_endian(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); extern struct device_node *of_cpu_device_node_get(int cpu); extern int of_cpu_node_to_id(struct device_node *np); extern struct device_node *of_get_next_cpu_node(struct device_node *prev); extern struct device_node *of_get_cpu_state_node(const struct device_node *cpu_node, int index); extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( const struct of_device_id *matches, const struct device_node *node); extern const void *of_device_get_match_data(const struct device *dev); extern int of_alias_from_compatible(const struct device_node *node, char *alias, int len); extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args); extern int __of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int cell_count, int index, struct of_phandle_args *out_args); extern int of_parse_phandle_with_args_map(const struct device_node *np, const char *list_name, const char *stem_name, int index, struct of_phandle_args *out_args); extern int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name); /* module functions */ extern ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len); extern int of_request_module(const struct device_node *np); /* phandle iterator functions */ extern int of_phandle_iterator_init(struct of_phandle_iterator *it, const struct device_node *np, const char *list_name, const char *cells_name, int cell_count); extern int of_phandle_iterator_next(struct of_phandle_iterator *it); extern int of_phandle_iterator_args(struct of_phandle_iterator *it, uint32_t *args, int size); extern int of_alias_get_id(const struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); bool of_machine_compatible_match(const char *const *compats); /** * of_machine_is_compatible - Test root of device tree for a given compatible value * @compat: compatible string to look for in root node's compatible property. * * Return: true if the root node has the given value in its compatible property. */ static inline bool of_machine_is_compatible(const char *compat) { const char *compats[] = { compat, NULL }; return of_machine_compatible_match(compats); } extern int of_add_property(struct device_node *np, struct property *prop); extern int of_remove_property(struct device_node *np, struct property *prop); extern int of_update_property(struct device_node *np, struct property *newprop); /* For updating the device tree at runtime */ #define OF_RECONFIG_ATTACH_NODE 0x0001 #define OF_RECONFIG_DETACH_NODE 0x0002 #define OF_RECONFIG_ADD_PROPERTY 0x0003 #define OF_RECONFIG_REMOVE_PROPERTY 0x0004 #define OF_RECONFIG_UPDATE_PROPERTY 0x0005 extern int of_attach_node(struct device_node *); extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) /* * u32 u; * * of_property_for_each_u32(np, "propname", u) * printk("U32 value: %x\n", u); */ const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu); /* * struct property *prop; * const char *s; * * of_property_for_each_string(np, "propname", prop, s) * printk("String value: %s\n", s); */ const char *of_prop_next_string(const struct property *prop, const char *cur); bool of_console_check(const struct device_node *dn, char *name, int index); int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); phys_addr_t of_dma_get_max_cpu_address(struct device_node *np); struct kimage; void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline, size_t extra_fdt_size); #else /* CONFIG_OF */ static inline void of_core_init(void) { } static inline bool is_of_node(const struct fwnode_handle *fwnode) { return false; } static inline struct device_node *to_of_node(const struct fwnode_handle *fwnode) { return NULL; } static inline bool of_node_name_eq(const struct device_node *np, const char *name) { return false; } static inline bool of_node_name_prefix(const struct device_node *np, const char *prefix) { return false; } static inline const char* of_node_full_name(const struct device_node *np) { return "<no-node>"; } static inline struct device_node *of_find_node_by_name(struct device_node *from, const char *name) { return NULL; } static inline struct device_node *of_find_node_by_type(struct device_node *from, const char *type) { return NULL; } static inline struct device_node *of_find_matching_node_and_match( struct device_node *from, const struct of_device_id *matches, const struct of_device_id **match) { return NULL; } static inline struct device_node *of_find_node_by_path(const char *path) { return NULL; } static inline struct device_node *of_find_node_opts_by_path(const char *path, const char **opts) { return NULL; } static inline struct device_node *of_find_node_by_phandle(phandle handle) { return NULL; } static inline struct device_node *of_get_parent(const struct device_node *node) { return NULL; } static inline struct device_node *of_get_next_parent(struct device_node *node) { return NULL; } static inline struct device_node *of_get_next_child( const struct device_node *node, struct device_node *prev) { return NULL; } static inline struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev) { return NULL; } static inline struct device_node *of_get_next_reserved_child( const struct device_node *node, struct device_node *prev) { return NULL; } static inline struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name) { return NULL; } #define of_fwnode_handle(node) NULL static inline struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible) { return NULL; } static inline struct device_node *of_get_child_by_name( const struct device_node *node, const char *name) { return NULL; } static inline int of_device_is_compatible(const struct device_node *device, const char *name) { return 0; } static inline int of_device_compatible_match(const struct device_node *device, const char *const *compat) { return 0; } static inline bool of_device_is_available(const struct device_node *device) { return false; } static inline bool of_device_is_big_endian(const struct device_node *device) { return false; } static inline struct property *of_find_property(const struct device_node *np, const char *name, int *lenp) { return NULL; } static inline struct device_node *of_find_compatible_node( struct device_node *from, const char *type, const char *compat) { return NULL; } static inline bool of_property_read_bool(const struct device_node *np, const char *propname) { return false; } static inline int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { return -ENOSYS; } static inline int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { return -ENOSYS; } static inline int of_property_read_u64_index(const struct device_node *np, const char *propname, u32 index, u64 *out_value) { return -ENOSYS; } static inline const void *of_get_property(const struct device_node *node, const char *name, int *lenp) { return NULL; } static inline struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) { return NULL; } static inline struct device_node *of_cpu_device_node_get(int cpu) { return NULL; } static inline int of_cpu_node_to_id(struct device_node *np) { return -ENODEV; } static inline struct device_node *of_get_next_cpu_node(struct device_node *prev) { return NULL; } static inline struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, int index) { return NULL; } static inline int of_n_addr_cells(struct device_node *np) { return 0; } static inline int of_n_size_cells(struct device_node *np) { return 0; } static inline int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max) { return -ENOSYS; } static inline int of_property_read_variable_u16_array(const struct device_node *np, const char *propname, u16 *out_values, size_t sz_min, size_t sz_max) { return -ENOSYS; } static inline int of_property_read_variable_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz_min, size_t sz_max) { return -ENOSYS; } static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { return -ENOSYS; } static inline int of_property_read_variable_u64_array(const struct device_node *np, const char *propname, u64 *out_values, size_t sz_min, size_t sz_max) { return -ENOSYS; } static inline int of_property_read_string(const struct device_node *np, const char *propname, const char **out_string) { return -ENOSYS; } static inline int of_property_match_string(const struct device_node *np, const char *propname, const char *string) { return -ENOSYS; } static inline int of_property_read_string_helper(const struct device_node *np, const char *propname, const char **out_strs, size_t sz, int index) { return -ENOSYS; } static inline int __of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int cell_count, int index, struct of_phandle_args *out_args) { return -ENOSYS; } static inline int of_parse_phandle_with_args_map(const struct device_node *np, const char *list_name, const char *stem_name, int index, struct of_phandle_args *out_args) { return -ENOSYS; } static inline int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name) { return -ENOSYS; } static inline ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len) { return -ENODEV; } static inline int of_request_module(const struct device_node *np) { return -ENODEV; } static inline int of_phandle_iterator_init(struct of_phandle_iterator *it, const struct device_node *np, const char *list_name, const char *cells_name, int cell_count) { return -ENOSYS; } static inline int of_phandle_iterator_next(struct of_phandle_iterator *it) { return -ENOSYS; } static inline int of_phandle_iterator_args(struct of_phandle_iterator *it, uint32_t *args, int size) { return 0; } static inline int of_alias_get_id(struct device_node *np, const char *stem) { return -ENOSYS; } static inline int of_alias_get_highest_id(const char *stem) { return -ENOSYS; } static inline int of_machine_is_compatible(const char *compat) { return 0; } static inline int of_add_property(struct device_node *np, struct property *prop) { return 0; } static inline int of_remove_property(struct device_node *np, struct property *prop) { return 0; } static inline bool of_machine_compatible_match(const char *const *compats) { return false; } static inline bool of_console_check(const struct device_node *dn, const char *name, int index) { return false; } static inline const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { return NULL; } static inline const char *of_prop_next_string(const struct property *prop, const char *cur) { return NULL; } static inline int of_node_check_flag(struct device_node *n, unsigned long flag) { return 0; } static inline int of_node_test_and_set_flag(struct device_node *n, unsigned long flag) { return 0; } static inline void of_node_set_flag(struct device_node *n, unsigned long flag) { } static inline void of_node_clear_flag(struct device_node *n, unsigned long flag) { } static inline int of_property_check_flag(const struct property *p, unsigned long flag) { return 0; } static inline void of_property_set_flag(struct property *p, unsigned long flag) { } static inline void of_property_clear_flag(struct property *p, unsigned long flag) { } static inline int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { return -EINVAL; } static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np) { return PHYS_ADDR_MAX; } static inline const void *of_device_get_match_data(const struct device *dev) { return NULL; } #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #endif /* CONFIG_OF */ /* Default string compare functions, Allow arch asm/prom.h to override */ #if !defined(of_compat_cmp) #define of_compat_cmp(s1, s2, l) strcasecmp((s1), (s2)) #define of_prop_cmp(s1, s2) strcmp((s1), (s2)) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif static inline int of_prop_val_eq(const struct property *p1, const struct property *p2) { return p1->length == p2->length && !memcmp(p1->value, p2->value, (size_t)p1->length); } #define for_each_property_of_node(dn, pp) \ for (pp = dn->properties; pp != NULL; pp = pp->next) #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else static inline int of_node_to_nid(struct device_node *device) { return NUMA_NO_NODE; } #endif #ifdef CONFIG_OF_NUMA extern int of_numa_init(void); #else static inline int of_numa_init(void) { return -ENOSYS; } #endif static inline struct device_node *of_find_matching_node( struct device_node *from, const struct of_device_id *matches) { return of_find_matching_node_and_match(from, matches, NULL); } static inline const char *of_node_get_device_type(const struct device_node *np) { return of_get_property(np, "device_type", NULL); } static inline bool of_node_is_type(const struct device_node *np, const char *type) { const char *match = of_node_get_device_type(np); return np && match && type && !strcmp(match, type); } /** * of_parse_phandle - Resolve a phandle property to a device_node pointer * @np: Pointer to device node holding phandle property * @phandle_name: Name of property holding a phandle value * @index: For properties holding a table of phandles, this is the index into * the table * * Return: The device_node pointer with refcount incremented. Use * of_node_put() on it when done. */ static inline struct device_node *of_parse_phandle(const struct device_node *np, const char *phandle_name, int index) { struct of_phandle_args args; if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0, index, &args)) return NULL; return args.np; } /** * of_parse_phandle_with_args() - Find a node pointed by phandle in a list * @np: pointer to a device tree node containing a list * @list_name: property name that contains a list * @cells_name: property name that specifies phandles' arguments count * @index: index of a phandle to parse out * @out_args: optional pointer to output arguments structure (will be filled) * * This function is useful to parse lists of phandles and their arguments. * Returns 0 on success and fills out_args, on error returns appropriate * errno value. * * Caller is responsible to call of_node_put() on the returned out_args->np * pointer. * * Example:: * * phandle1: node1 { * #list-cells = <2>; * }; * * phandle2: node2 { * #list-cells = <1>; * }; * * node3 { * list = <&phandle1 1 2 &phandle2 3>; * }; * * To get a device_node of the ``node2`` node you may call this: * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args); */ static inline int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int index, struct of_phandle_args *out_args) { int cell_count = -1; /* If cells_name is NULL we assume a cell count of 0 */ if (!cells_name) cell_count = 0; return __of_parse_phandle_with_args(np, list_name, cells_name, cell_count, index, out_args); } /** * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list * @np: pointer to a device tree node containing a list * @list_name: property name that contains a list * @cell_count: number of argument cells following the phandle * @index: index of a phandle to parse out * @out_args: optional pointer to output arguments structure (will be filled) * * This function is useful to parse lists of phandles and their arguments. * Returns 0 on success and fills out_args, on error returns appropriate * errno value. * * Caller is responsible to call of_node_put() on the returned out_args->np * pointer. * * Example:: * * phandle1: node1 { * }; * * phandle2: node2 { * }; * * node3 { * list = <&phandle1 0 2 &phandle2 2 3>; * }; * * To get a device_node of the ``node2`` node you may call this: * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args); */ static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, const char *list_name, int cell_count, int index, struct of_phandle_args *out_args) { return __of_parse_phandle_with_args(np, list_name, NULL, cell_count, index, out_args); } /** * of_parse_phandle_with_optional_args() - Find a node pointed by phandle in a list * @np: pointer to a device tree node containing a list * @list_name: property name that contains a list * @cells_name: property name that specifies phandles' arguments count * @index: index of a phandle to parse out * @out_args: optional pointer to output arguments structure (will be filled) * * Same as of_parse_phandle_with_args() except that if the cells_name property * is not found, cell_count of 0 is assumed. * * This is used to useful, if you have a phandle which didn't have arguments * before and thus doesn't have a '#*-cells' property but is now migrated to * having arguments while retaining backwards compatibility. */ static inline int of_parse_phandle_with_optional_args(const struct device_node *np, const char *list_name, const char *cells_name, int index, struct of_phandle_args *out_args) { return __of_parse_phandle_with_args(np, list_name, cells_name, 0, index, out_args); } /** * of_phandle_args_equal() - Compare two of_phandle_args * @a1: First of_phandle_args to compare * @a2: Second of_phandle_args to compare * * Return: True if a1 and a2 are the same (same node pointer, same phandle * args), false otherwise. */ static inline bool of_phandle_args_equal(const struct of_phandle_args *a1, const struct of_phandle_args *a2) { return a1->np == a2->np && a1->args_count == a2->args_count && !memcmp(a1->args, a2->args, sizeof(a1->args[0]) * a1->args_count); } /** * of_property_count_u8_elems - Count the number of u8 elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u8 elements * in it. * * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u8 and -ENODATA if the * property does not have a value. */ static inline int of_property_count_u8_elems(const struct device_node *np, const char *propname) { return of_property_count_elems_of_size(np, propname, sizeof(u8)); } /** * of_property_count_u16_elems - Count the number of u16 elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u16 elements * in it. * * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u16 and -ENODATA if the * property does not have a value. */ static inline int of_property_count_u16_elems(const struct device_node *np, const char *propname) { return of_property_count_elems_of_size(np, propname, sizeof(u16)); } /** * of_property_count_u32_elems - Count the number of u32 elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u32 elements * in it. * * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u32 and -ENODATA if the * property does not have a value. */ static inline int of_property_count_u32_elems(const struct device_node *np, const char *propname) { return of_property_count_elems_of_size(np, propname, sizeof(u32)); } /** * of_property_count_u64_elems - Count the number of u64 elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u64 elements * in it. * * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u64 and -ENODATA if the * property does not have a value. */ static inline int of_property_count_u64_elems(const struct device_node *np, const char *propname) { return of_property_count_elems_of_size(np, propname, sizeof(u64)); } /** * of_property_read_string_array() - Read an array of strings from a multiple * strings property. * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_strs: output array of string pointers. * @sz: number of array elements to read. * * Search for a property in a device tree node and retrieve a list of * terminated string values (pointer to data, not a copy) in that property. * * Return: If @out_strs is NULL, the number of strings in the property is returned. */ static inline int of_property_read_string_array(const struct device_node *np, const char *propname, const char **out_strs, size_t sz) { return of_property_read_string_helper(np, propname, out_strs, sz, 0); } /** * of_property_count_strings() - Find and return the number of strings from a * multiple strings property. * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a property in a device tree node and retrieve the number of null * terminated string contain in it. * * Return: The number of strings on success, -EINVAL if the property does not * exist, -ENODATA if property does not have a value, and -EILSEQ if the string * is not null-terminated within the length of the property data. */ static inline int of_property_count_strings(const struct device_node *np, const char *propname) { return of_property_read_string_helper(np, propname, NULL, 0, 0); } /** * of_property_read_string_index() - Find and read a string from a multiple * strings property. * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the string in the list of strings * @output: pointer to null terminated return string, modified only if * return value is 0. * * Search for a property in a device tree node and retrieve a null * terminated string value (pointer to data, not a copy) in the list of strings * contained in that property. * * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if * property does not have a value, and -EILSEQ if the string is not * null-terminated within the length of the property data. * * The out_string pointer is modified only if a valid string can be decoded. */ static inline int of_property_read_string_index(const struct device_node *np, const char *propname, int index, const char **output) { int rc = of_property_read_string_helper(np, propname, output, 1, index); return rc < 0 ? rc : 0; } /** * of_property_present - Test if a property is present in a node * @np: device node to search for the property. * @propname: name of the property to be searched. * * Test for a property present in a device node. * * Return: true if the property exists false otherwise. */ static inline bool of_property_present(const struct device_node *np, const char *propname) { struct property *prop = of_find_property(np, propname, NULL); return prop ? true : false; } /** * of_property_read_u8_array - Find and read an array of u8 from a property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return value, modified only if return value is 0. * @sz: number of array elements to read * * Search for a property in a device node and read 8-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 8 <0x50 0x60 0x70>;`` * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_values is modified only if a valid u8 value can be decoded. */ static inline int of_property_read_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz) { int ret = of_property_read_variable_u8_array(np, propname, out_values, sz, 0); if (ret >= 0) return 0; else return ret; } /** * of_property_read_u16_array - Find and read an array of u16 from a property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return value, modified only if return value is 0. * @sz: number of array elements to read * * Search for a property in a device node and read 16-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_values is modified only if a valid u16 value can be decoded. */ static inline int of_property_read_u16_array(const struct device_node *np, const char *propname, u16 *out_values, size_t sz) { int ret = of_property_read_variable_u16_array(np, propname, out_values, sz, 0); if (ret >= 0) return 0; else return ret; } /** * of_property_read_u32_array - Find and read an array of 32 bit integers * from a property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return value, modified only if return value is 0. * @sz: number of array elements to read * * Search for a property in a device node and read 32-bit value(s) from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_values is modified only if a valid u32 value can be decoded. */ static inline int of_property_read_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz) { int ret = of_property_read_variable_u32_array(np, propname, out_values, sz, 0); if (ret >= 0) return 0; else return ret; } /** * of_property_read_u64_array - Find and read an array of 64 bit integers * from a property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return value, modified only if return value is 0. * @sz: number of array elements to read * * Search for a property in a device node and read 64-bit value(s) from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_values is modified only if a valid u64 value can be decoded. */ static inline int of_property_read_u64_array(const struct device_node *np, const char *propname, u64 *out_values, size_t sz) { int ret = of_property_read_variable_u64_array(np, propname, out_values, sz, 0); if (ret >= 0) return 0; else return ret; } static inline int of_property_read_u8(const struct device_node *np, const char *propname, u8 *out_value) { return of_property_read_u8_array(np, propname, out_value, 1); } static inline int of_property_read_u16(const struct device_node *np, const char *propname, u16 *out_value) { return of_property_read_u16_array(np, propname, out_value, 1); } static inline int of_property_read_u32(const struct device_node *np, const char *propname, u32 *out_value) { return of_property_read_u32_array(np, propname, out_value, 1); } static inline int of_property_read_s32(const struct device_node *np, const char *propname, s32 *out_value) { return of_property_read_u32(np, propname, (u32*) out_value); } #define of_for_each_phandle(it, err, np, ln, cn, cc) \ for (of_phandle_iterator_init((it), (np), (ln), (cn), (cc)), \ err = of_phandle_iterator_next(it); \ err == 0; \ err = of_phandle_iterator_next(it)) #define of_property_for_each_u32(np, propname, u) \ for (struct {const struct property *prop; const __be32 *item; } _it = \ {of_find_property(np, propname, NULL), \ of_prop_next_u32(_it.prop, NULL, &u)}; \ _it.item; \ _it.item = of_prop_next_u32(_it.prop, _it.item, &u)) #define of_property_for_each_string(np, propname, prop, s) \ for (prop = of_find_property(np, propname, NULL), \ s = of_prop_next_string(prop, NULL); \ s; \ s = of_prop_next_string(prop, s)) #define for_each_node_by_name(dn, name) \ for (dn = of_find_node_by_name(NULL, name); dn; \ dn = of_find_node_by_name(dn, name)) #define for_each_node_by_type(dn, type) \ for (dn = of_find_node_by_type(NULL, type); dn; \ dn = of_find_node_by_type(dn, type)) #define for_each_compatible_node(dn, type, compatible) \ for (dn = of_find_compatible_node(NULL, type, compatible); dn; \ dn = of_find_compatible_node(dn, type, compatible)) #define for_each_matching_node(dn, matches) \ for (dn = of_find_matching_node(NULL, matches); dn; \ dn = of_find_matching_node(dn, matches)) #define for_each_matching_node_and_match(dn, matches, match) \ for (dn = of_find_matching_node_and_match(NULL, matches, match); \ dn; dn = of_find_matching_node_and_match(dn, matches, match)) #define for_each_child_of_node(parent, child) \ for (child = of_get_next_child(parent, NULL); child != NULL; \ child = of_get_next_child(parent, child)) #define for_each_child_of_node_scoped(parent, child) \ for (struct device_node *child __free(device_node) = \ of_get_next_child(parent, NULL); \ child != NULL; \ child = of_get_next_child(parent, child)) #define for_each_child_of_node_with_prefix(parent, child, prefix) \ for (struct device_node *child __free(device_node) = \ of_get_next_child_with_prefix(parent, NULL, prefix); \ child != NULL; \ child = of_get_next_child_with_prefix(parent, child, prefix)) #define for_each_available_child_of_node(parent, child) \ for (child = of_get_next_available_child(parent, NULL); child != NULL; \ child = of_get_next_available_child(parent, child)) #define for_each_reserved_child_of_node(parent, child) \ for (child = of_get_next_reserved_child(parent, NULL); child != NULL; \ child = of_get_next_reserved_child(parent, child)) #define for_each_available_child_of_node_scoped(parent, child) \ for (struct device_node *child __free(device_node) = \ of_get_next_available_child(parent, NULL); \ child != NULL; \ child = of_get_next_available_child(parent, child)) #define for_each_of_cpu_node(cpu) \ for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \ cpu = of_get_next_cpu_node(cpu)) #define for_each_node_with_property(dn, prop_name) \ for (dn = of_find_node_with_property(NULL, prop_name); dn; \ dn = of_find_node_with_property(dn, prop_name)) static inline int of_get_child_count(const struct device_node *np) { struct device_node *child; int num = 0; for_each_child_of_node(np, child) num++; return num; } static inline int of_get_available_child_count(const struct device_node *np) { struct device_node *child; int num = 0; for_each_available_child_of_node(np, child) num++; return num; } #define _OF_DECLARE_STUB(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ __attribute__((unused)) \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #if defined(CONFIG_OF) && !defined(MODULE) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ __used __section("__" #table "_of_table") \ __aligned(__alignof__(struct of_device_id)) \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #else #define _OF_DECLARE(table, name, compat, fn, fn_type) \ _OF_DECLARE_STUB(table, name, compat, fn, fn_type) #endif typedef int (*of_init_fn_2)(struct device_node *, struct device_node *); typedef int (*of_init_fn_1_ret)(struct device_node *); typedef void (*of_init_fn_1)(struct device_node *); #define OF_DECLARE_1(table, name, compat, fn) \ _OF_DECLARE(table, name, compat, fn, of_init_fn_1) #define OF_DECLARE_1_RET(table, name, compat, fn) \ _OF_DECLARE(table, name, compat, fn, of_init_fn_1_ret) #define OF_DECLARE_2(table, name, compat, fn) \ _OF_DECLARE(table, name, compat, fn, of_init_fn_2) /** * struct of_changeset_entry - Holds a changeset entry * * @node: list_head for the log list * @action: notifier action * @np: pointer to the device node affected * @prop: pointer to the property affected * @old_prop: hold a pointer to the original property * * Every modification of the device tree during a changeset * is held in a list of of_changeset_entry structures. * That way we can recover from a partial application, or we can * revert the changeset */ struct of_changeset_entry { struct list_head node; unsigned long action; struct device_node *np; struct property *prop; struct property *old_prop; }; /** * struct of_changeset - changeset tracker structure * * @entries: list_head for the changeset entries * * changesets are a convenient way to apply bulk changes to the * live tree. In case of an error, changes are rolled-back. * changesets live on after initial application, and if not * destroyed after use, they can be reverted in one single call. */ struct of_changeset { struct list_head entries; }; enum of_reconfig_change { OF_RECONFIG_NO_CHANGE = 0, OF_RECONFIG_CHANGE_ADD, OF_RECONFIG_CHANGE_REMOVE, }; struct notifier_block; #ifdef CONFIG_OF_DYNAMIC extern int of_reconfig_notifier_register(struct notifier_block *); extern int of_reconfig_notifier_unregister(struct notifier_block *); extern int of_reconfig_notify(unsigned long, struct of_reconfig_data *rd); extern int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *arg); extern void of_changeset_init(struct of_changeset *ocs); extern void of_changeset_destroy(struct of_changeset *ocs); extern int of_changeset_apply(struct of_changeset *ocs); extern int of_changeset_revert(struct of_changeset *ocs); extern int of_changeset_action(struct of_changeset *ocs, unsigned long action, struct device_node *np, struct property *prop); static inline int of_changeset_attach_node(struct of_changeset *ocs, struct device_node *np) { return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL); } static inline int of_changeset_detach_node(struct of_changeset *ocs, struct device_node *np) { return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL); } static inline int of_changeset_add_property(struct of_changeset *ocs, struct device_node *np, struct property *prop) { return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop); } static inline int of_changeset_remove_property(struct of_changeset *ocs, struct device_node *np, struct property *prop) { return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop); } static inline int of_changeset_update_property(struct of_changeset *ocs, struct device_node *np, struct property *prop) { return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop); } struct device_node *of_changeset_create_node(struct of_changeset *ocs, struct device_node *parent, const char *full_name); int of_changeset_add_prop_string(struct of_changeset *ocs, struct device_node *np, const char *prop_name, const char *str); int of_changeset_add_prop_string_array(struct of_changeset *ocs, struct device_node *np, const char *prop_name, const char * const *str_array, size_t sz); int of_changeset_add_prop_u32_array(struct of_changeset *ocs, struct device_node *np, const char *prop_name, const u32 *array, size_t sz); static inline int of_changeset_add_prop_u32(struct of_changeset *ocs, struct device_node *np, const char *prop_name, const u32 val) { return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1); } int of_changeset_update_prop_string(struct of_changeset *ocs, struct device_node *np, const char *prop_name, const char *str); int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np, const char *prop_name); #else /* CONFIG_OF_DYNAMIC */ static inline int of_reconfig_notifier_register(struct notifier_block *nb) { return -EINVAL; } static inline int of_reconfig_notifier_unregister(struct notifier_block *nb) { return -EINVAL; } static inline int of_reconfig_notify(unsigned long action, struct of_reconfig_data *arg) { return -EINVAL; } static inline int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *arg) { return -EINVAL; } #endif /* CONFIG_OF_DYNAMIC */ /** * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node * @np: Pointer to the given device_node * * Return: true if present false otherwise */ static inline bool of_device_is_system_power_controller(const struct device_node *np) { return of_property_read_bool(np, "system-power-controller"); } /** * of_have_populated_dt() - Has DT been populated by bootloader * * Return: True if a DTB has been populated by the bootloader and it isn't the * empty builtin one. False otherwise. */ static inline bool of_have_populated_dt(void) { #ifdef CONFIG_OF return of_property_present(of_root, "compatible"); #else return false; #endif } /* * Overlay support */ enum of_overlay_notify_action { OF_OVERLAY_INIT = 0, /* kzalloc() of ovcs sets this value */ OF_OVERLAY_PRE_APPLY, OF_OVERLAY_POST_APPLY, OF_OVERLAY_PRE_REMOVE, OF_OVERLAY_POST_REMOVE, }; static inline const char *of_overlay_action_name(enum of_overlay_notify_action action) { static const char *const of_overlay_action_name[] = { "init", "pre-apply", "post-apply", "pre-remove", "post-remove", }; return of_overlay_action_name[action]; } struct of_overlay_notify_data { struct device_node *overlay; struct device_node *target; }; #ifdef CONFIG_OF_OVERLAY int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, int *ovcs_id, const struct device_node *target_base); int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); int of_overlay_notifier_register(struct notifier_block *nb); int of_overlay_notifier_unregister(struct notifier_block *nb); #else static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, int *ovcs_id, const struct device_node *target_base) { return -ENOTSUPP; } static inline int of_overlay_remove(int *ovcs_id) { return -ENOTSUPP; } static inline int of_overlay_remove_all(void) { return -ENOTSUPP; } static inline int of_overlay_notifier_register(struct notifier_block *nb) { return 0; } static inline int of_overlay_notifier_unregister(struct notifier_block *nb) { return 0; } #endif #endif /* _LINUX_OF_H */
10 5 2 2 2 5 1 2 1 1 1 1 3 3 3 3 3 3 2 1 1 1 1 5 1 2 5 3 4 5 5 5 5 2 5 5 5 5 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 // SPDX-License-Identifier: GPL-2.0 /* * FPU register's regset abstraction, for ptrace, core dumps, etc. */ #include <linux/sched/task_stack.h> #include <linux/vmalloc.h> #include <asm/fpu/api.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> #include <asm/prctl.h> #include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, * as the "regset->n" for the xstate regset will be updated based on the feature * capabilities supported by the xsave. */ int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { return regset->n; } int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { if (boot_cpu_has(X86_FEATURE_FXSR)) return regset->n; else return 0; } /* * The regset get() functions are invoked from: * * - coredump to dump the current task's fpstate. If the current task * owns the FPU then the memory state has to be synchronized and the * FPU register state preserved. Otherwise fpstate is already in sync. * * - ptrace to dump fpstate of a stopped task, in which case the registers * have already been saved to fpstate on context switch. */ static void sync_fpstate(struct fpu *fpu) { if (fpu == &current->thread.fpu) fpu_sync_fpstate(fpu); } /* * Invalidate cached FPU registers before modifying the stopped target * task's fpstate. * * This forces the target task on resume to restore the FPU registers from * modified fpstate. Otherwise the task might skip the restore and operate * with the cached FPU registers which discards the modifications. */ static void fpu_force_restore(struct fpu *fpu) { /* * Only stopped child tasks can be used to modify the FPU * state in the fpstate buffer: */ WARN_ON_FPU(fpu == &current->thread.fpu); __fpu_invalidate_fpregs_state(fpu); } int xfpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { struct fpu *fpu = &target->thread.fpu; if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; sync_fpstate(fpu); if (!use_xsave()) { return membuf_write(&to, &fpu->fpstate->regs.fxsave, sizeof(fpu->fpstate->regs.fxsave)); } copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX); return 0; } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; struct fxregs_state newstate; int ret; if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; /* No funny business with partial or oversized writes is permitted. */ if (pos != 0 || count != sizeof(newstate)) return -EINVAL; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) return ret; /* Do not allow an invalid MXCSR value. */ if (newstate.mxcsr & ~mxcsr_feature_mask) return -EINVAL; fpu_force_restore(fpu); /* Copy the state */ memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); /* Clear xmm8..15 for 32-bit callers */ BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); if (in_ia32_syscall()) memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; return 0; } int xstateregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) return -ENODEV; sync_fpstate(&target->thread.fpu); copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_XSAVE); return 0; } int xstateregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; struct xregs_state *tmpbuf = NULL; int ret; if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) return -ENODEV; /* * A whole standard-format XSAVE buffer is needed: */ if (pos != 0 || count != fpu_user_cfg.max_size) return -EFAULT; if (!kbuf) { tmpbuf = vmalloc(count); if (!tmpbuf) return -ENOMEM; if (copy_from_user(tmpbuf, ubuf, count)) { ret = -EFAULT; goto out; } } fpu_force_restore(fpu); ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru); out: vfree(tmpbuf); return ret; } #ifdef CONFIG_X86_USER_SHADOW_STACK int ssp_active(struct task_struct *target, const struct user_regset *regset) { if (target->thread.features & ARCH_SHSTK_SHSTK) return regset->n; return 0; } int ssp_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { struct fpu *fpu = &target->thread.fpu; struct cet_user_state *cetregs; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !ssp_active(target, regset)) return -ENODEV; sync_fpstate(fpu); cetregs = get_xsave_addr(&fpu->fpstate->regs.xsave, XFEATURE_CET_USER); if (WARN_ON(!cetregs)) { /* * This shouldn't ever be NULL because shadow stack was * verified to be enabled above. This means * MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so * XFEATURE_CET_USER should not be in the init state. */ return -ENODEV; } return membuf_write(&to, (unsigned long *)&cetregs->user_ssp, sizeof(cetregs->user_ssp)); } int ssp_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; struct xregs_state *xsave = &fpu->fpstate->regs.xsave; struct cet_user_state *cetregs; unsigned long user_ssp; int r; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !ssp_active(target, regset)) return -ENODEV; if (pos != 0 || count != sizeof(user_ssp)) return -EINVAL; r = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_ssp, 0, -1); if (r) return r; /* * Some kernel instructions (IRET, etc) can cause exceptions in the case * of disallowed CET register values. Just prevent invalid values. */ if (user_ssp >= TASK_SIZE_MAX || !IS_ALIGNED(user_ssp, 8)) return -EINVAL; fpu_force_restore(fpu); cetregs = get_xsave_addr(xsave, XFEATURE_CET_USER); if (WARN_ON(!cetregs)) { /* * This shouldn't ever be NULL because shadow stack was * verified to be enabled above. This means * MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so * XFEATURE_CET_USER should not be in the init state. */ return -ENODEV; } cetregs->user_ssp = user_ssp; return 0; } #endif /* CONFIG_X86_USER_SHADOW_STACK */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION /* * FPU tag word conversions. */ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) { unsigned int tmp; /* to avoid 16 bit prefixes in the code */ /* Transform each pair of bits into 01 (valid) or 00 (empty) */ tmp = ~twd; tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ /* and move the valid bits to the lower byte. */ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ return tmp; } #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) #define FP_EXP_TAG_VALID 0 #define FP_EXP_TAG_ZERO 1 #define FP_EXP_TAG_SPECIAL 2 #define FP_EXP_TAG_EMPTY 3 static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave) { struct _fpxreg *st; u32 tos = (fxsave->swd >> 11) & 7; u32 twd = (unsigned long) fxsave->twd; u32 tag; u32 ret = 0xffff0000u; int i; for (i = 0; i < 8; i++, twd >>= 1) { if (twd & 0x1) { st = FPREG_ADDR(fxsave, (i - tos) & 7); switch (st->exponent & 0x7fff) { case 0x7fff: tag = FP_EXP_TAG_SPECIAL; break; case 0x0000: if (!st->significand[0] && !st->significand[1] && !st->significand[2] && !st->significand[3]) tag = FP_EXP_TAG_ZERO; else tag = FP_EXP_TAG_SPECIAL; break; default: if (st->significand[3] & 0x8000) tag = FP_EXP_TAG_VALID; else tag = FP_EXP_TAG_SPECIAL; break; } } else { tag = FP_EXP_TAG_EMPTY; } ret |= tag << (2 * i); } return ret; } /* * FXSR floating point environment conversions. */ static void __convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk, struct fxregs_state *fxsave) { struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; env->cwd = fxsave->cwd | 0xffff0000u; env->swd = fxsave->swd | 0xffff0000u; env->twd = twd_fxsr_to_i387(fxsave); #ifdef CONFIG_X86_64 env->fip = fxsave->rip; env->foo = fxsave->rdp; /* * should be actually ds/cs at fpu exception time, but * that information is not available in 64bit mode. */ env->fcs = task_pt_regs(tsk)->cs; if (tsk == current) { savesegment(ds, env->fos); } else { env->fos = tsk->thread.ds; } env->fos |= 0xffff0000; #else env->fip = fxsave->fip; env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); env->foo = fxsave->foo; env->fos = fxsave->fos; #endif for (i = 0; i < 8; ++i) memcpy(&to[i], &from[i], sizeof(to[0])); } void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave); } void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env) { struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; fxsave->cwd = env->cwd; fxsave->swd = env->swd; fxsave->twd = twd_i387_to_fxsr(env->twd); fxsave->fop = (u16) ((u32) env->fcs >> 16); #ifdef CONFIG_X86_64 fxsave->rip = env->fip; fxsave->rdp = env->foo; /* cs and ds ignored */ #else fxsave->fip = env->fip; fxsave->fcs = (env->fcs & 0xffff); fxsave->foo = env->foo; fxsave->fos = env->fos; #endif for (i = 0; i < 8; ++i) memcpy(&to[i], &from[i], sizeof(from[0])); } int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; struct fxregs_state fxsave, *fx; sync_fpstate(fpu); if (!cpu_feature_enabled(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, to); if (!cpu_feature_enabled(X86_FEATURE_FXSR)) { return membuf_write(&to, &fpu->fpstate->regs.fsave, sizeof(struct fregs_state)); } if (use_xsave()) { struct membuf mb = { .p = &fxsave, .left = sizeof(fxsave) }; /* Handle init state optimized xstate correctly */ copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP); fx = &fxsave; } else { fx = &fpu->fpstate->regs.fxsave; } __convert_from_fxsr(&env, target, fx); return membuf_write(&to, &env, sizeof(env)); } int fpregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; int ret; /* No funny business with partial or oversized writes is permitted. */ if (pos != 0 || count != sizeof(struct user_i387_ia32_struct)) return -EINVAL; if (!cpu_feature_enabled(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); if (ret) return ret; fpu_force_restore(fpu); if (cpu_feature_enabled(X86_FEATURE_FXSR)) convert_to_fxsr(&fpu->fpstate->regs.fxsave, &env); else memcpy(&fpu->fpstate->regs.fsave, &env, sizeof(env)); /* * Update the header bit in the xsave header, indicating the * presence of FP. */ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FP; return 0; } #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
17 13 15 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 /* * linux/fs/nls/nls_cp852.c * * Charset cp852 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x016f, 0x0107, 0x00e7, 0x0142, 0x00eb, 0x0150, 0x0151, 0x00ee, 0x0179, 0x00c4, 0x0106, /* 0x90*/ 0x00c9, 0x0139, 0x013a, 0x00f4, 0x00f6, 0x013d, 0x013e, 0x015a, 0x015b, 0x00d6, 0x00dc, 0x0164, 0x0165, 0x0141, 0x00d7, 0x010d, /* 0xa0*/ 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x0104, 0x0105, 0x017d, 0x017e, 0x0118, 0x0119, 0x00ac, 0x017a, 0x010c, 0x015f, 0x00ab, 0x00bb, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x011a, 0x015e, 0x2563, 0x2551, 0x2557, 0x255d, 0x017b, 0x017c, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x0102, 0x0103, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4, /* 0xd0*/ 0x0111, 0x0110, 0x010e, 0x00cb, 0x010f, 0x0147, 0x00cd, 0x00ce, 0x011b, 0x2518, 0x250c, 0x2588, 0x2584, 0x0162, 0x016e, 0x2580, /* 0xe0*/ 0x00d3, 0x00df, 0x00d4, 0x0143, 0x0144, 0x0148, 0x0160, 0x0161, 0x0154, 0x00da, 0x0155, 0x0170, 0x00fd, 0x00dd, 0x0163, 0x00b4, /* 0xf0*/ 0x00ad, 0x02dd, 0x02db, 0x02c7, 0x02d8, 0x00a7, 0x00f7, 0x00b8, 0x00b0, 0x00a8, 0x02d9, 0x0171, 0x0158, 0x0159, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0x00, 0x00, 0x00, 0xcf, 0x00, 0x00, 0xf5, /* 0xa0-0xa7 */ 0xf9, 0x00, 0x00, 0xae, 0xaa, 0xf0, 0x00, 0x00, /* 0xa8-0xaf */ 0xf8, 0x00, 0x00, 0x00, 0xef, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0xf7, 0x00, 0x00, 0xaf, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0xb5, 0xb6, 0x00, 0x8e, 0x00, 0x00, 0x80, /* 0xc0-0xc7 */ 0x00, 0x90, 0x00, 0xd3, 0x00, 0xd6, 0xd7, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0xe0, 0xe2, 0x00, 0x99, 0x9e, /* 0xd0-0xd7 */ 0x00, 0x00, 0xe9, 0x00, 0x9a, 0xed, 0x00, 0xe1, /* 0xd8-0xdf */ 0x00, 0xa0, 0x83, 0x00, 0x84, 0x00, 0x00, 0x87, /* 0xe0-0xe7 */ 0x00, 0x82, 0x00, 0x89, 0x00, 0xa1, 0x8c, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0xa2, 0x93, 0x00, 0x94, 0xf6, /* 0xf0-0xf7 */ 0x00, 0x00, 0xa3, 0x00, 0x81, 0xec, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0xc6, 0xc7, 0xa4, 0xa5, 0x8f, 0x86, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xac, 0x9f, 0xd2, 0xd4, /* 0x08-0x0f */ 0xd1, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xa8, 0xa9, 0xb7, 0xd8, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x91, 0x92, 0x00, 0x00, 0x95, 0x96, 0x00, /* 0x38-0x3f */ 0x00, 0x9d, 0x88, 0xe3, 0xe4, 0x00, 0x00, 0xd5, /* 0x40-0x47 */ 0xe5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x8a, 0x8b, 0x00, 0x00, 0xe8, 0xea, 0x00, 0x00, /* 0x50-0x57 */ 0xfc, 0xfd, 0x97, 0x98, 0x00, 0x00, 0xb8, 0xad, /* 0x58-0x5f */ 0xe6, 0xe7, 0xdd, 0xee, 0x9b, 0x9c, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0x85, /* 0x68-0x6f */ 0xeb, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x8d, 0xab, 0xbd, 0xbe, 0xa6, 0xa7, 0x00, /* 0x78-0x7f */ }; static const unsigned char page02[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf3, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0xf4, 0xfa, 0x00, 0xf2, 0x00, 0xf1, 0x00, 0x00, /* 0xd8-0xdf */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0x00, 0x00, 0xc9, 0x00, 0x00, 0xbb, /* 0x50-0x57 */ 0x00, 0x00, 0xc8, 0x00, 0x00, 0xbc, 0x00, 0x00, /* 0x58-0x5f */ 0xcc, 0x00, 0x00, 0xb9, 0x00, 0x00, 0xcb, 0x00, /* 0x60-0x67 */ 0x00, 0xca, 0x00, 0x00, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, page02, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x87, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8b, 0x8b, 0x8c, 0xab, 0x84, 0x86, /* 0x88-0x8f */ 0x82, 0x92, 0x92, 0x93, 0x94, 0x96, 0x96, 0x98, /* 0x90-0x97 */ 0x98, 0x94, 0x81, 0x9c, 0x9c, 0x88, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa5, 0xa5, 0xa7, 0xa7, /* 0xa0-0xa7 */ 0xa9, 0xa9, 0xaa, 0xab, 0x9f, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xa0, 0x83, 0xd8, /* 0xb0-0xb7 */ 0xad, 0xb9, 0xba, 0xbb, 0xbc, 0xbe, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc7, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd0, 0xd4, 0x89, 0xd4, 0xe5, 0xa1, 0x8c, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xee, 0x85, 0xdf, /* 0xd8-0xdf */ 0xa2, 0xe1, 0x93, 0xe4, 0xe4, 0xe5, 0xe7, 0xe7, /* 0xe0-0xe7 */ 0xea, 0xa3, 0xea, 0xfb, 0xec, 0xec, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfd, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x9a, 0x90, 0xb6, 0x8e, 0xde, 0x8f, 0x80, /* 0x80-0x87 */ 0x9d, 0xd3, 0x8a, 0x8a, 0xd7, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x91, 0xe2, 0x99, 0x95, 0x95, 0x97, /* 0x90-0x97 */ 0x97, 0x99, 0x9a, 0x9b, 0x9b, 0x9d, 0x9e, 0xac, /* 0x98-0x9f */ 0xb5, 0xd6, 0xe0, 0xe9, 0xa4, 0xa4, 0xa6, 0xa6, /* 0xa0-0xa7 */ 0xa8, 0xa8, 0xaa, 0x8d, 0xac, 0xb8, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbd, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd1, 0xd1, 0xd2, 0xd3, 0xd2, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xb7, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe3, 0xd5, 0xe6, 0xe6, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xe8, 0xeb, 0xed, 0xed, 0xdd, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xeb, 0xfc, 0xfc, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp852", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp852(void) { return register_nls(&table); } static void __exit exit_nls_cp852(void) { unregister_nls(&table); } module_init(init_nls_cp852) module_exit(exit_nls_cp852) MODULE_DESCRIPTION("NLS Codepage 852 (Central/Eastern Europe)"); MODULE_LICENSE("Dual BSD/GPL");
1 8 8 1 8 3 3 3 3 3 3 3 3 3 3 3 2 2 1 1 1 11 4 7 3 2 2 1 4 4 4 3 3 1 2 5 3 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-vbi-cap.c - vbi capture support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/videodev2.h> #include <media/v4l2-common.h> #include "vivid-core.h" #include "vivid-kthread-cap.h" #include "vivid-vbi-cap.h" #include "vivid-vbi-gen.h" #include "vivid-vid-common.h" static void vivid_sliced_vbi_cap_fill(struct vivid_dev *dev, unsigned seqnr) { struct vivid_vbi_gen_data *vbi_gen = &dev->vbi_gen; bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; vivid_vbi_gen_sliced(vbi_gen, is_60hz, seqnr); if (!is_60hz) { if (vivid_vid_can_loop(dev)) { if (dev->vbi_out_have_wss) { vbi_gen->data[12].data[0] = dev->vbi_out_wss[0]; vbi_gen->data[12].data[1] = dev->vbi_out_wss[1]; } else { vbi_gen->data[12].id = 0; } } else { switch (tpg_g_video_aspect(&dev->tpg)) { case TPG_VIDEO_ASPECT_14X9_CENTRE: vbi_gen->data[12].data[0] = 0x01; break; case TPG_VIDEO_ASPECT_16X9_CENTRE: vbi_gen->data[12].data[0] = 0x0b; break; case TPG_VIDEO_ASPECT_16X9_ANAMORPHIC: vbi_gen->data[12].data[0] = 0x07; break; case TPG_VIDEO_ASPECT_4X3: default: vbi_gen->data[12].data[0] = 0x08; break; } } } else if (vivid_vid_can_loop(dev) && is_60hz) { if (dev->vbi_out_have_cc[0]) { vbi_gen->data[0].data[0] = dev->vbi_out_cc[0][0]; vbi_gen->data[0].data[1] = dev->vbi_out_cc[0][1]; } else { vbi_gen->data[0].id = 0; } if (dev->vbi_out_have_cc[1]) { vbi_gen->data[1].data[0] = dev->vbi_out_cc[1][0]; vbi_gen->data[1].data[1] = dev->vbi_out_cc[1][1]; } else { vbi_gen->data[1].id = 0; } } } static void vivid_g_fmt_vbi_cap(struct vivid_dev *dev, struct v4l2_vbi_format *vbi) { bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; vbi->sampling_rate = 27000000; vbi->offset = 24; vbi->samples_per_line = 1440; vbi->sample_format = V4L2_PIX_FMT_GREY; vbi->start[0] = is_60hz ? V4L2_VBI_ITU_525_F1_START + 9 : V4L2_VBI_ITU_625_F1_START + 5; vbi->start[1] = is_60hz ? V4L2_VBI_ITU_525_F2_START + 9 : V4L2_VBI_ITU_625_F2_START + 5; vbi->count[0] = vbi->count[1] = is_60hz ? 12 : 18; vbi->flags = dev->vbi_cap_interlaced ? V4L2_VBI_INTERLACED : 0; vbi->reserved[0] = 0; vbi->reserved[1] = 0; } void vivid_raw_vbi_cap_process(struct vivid_dev *dev, struct vivid_buffer *buf) { struct v4l2_vbi_format vbi; u8 *vbuf = vb2_plane_vaddr(&buf->vb.vb2_buf, 0); vivid_g_fmt_vbi_cap(dev, &vbi); buf->vb.sequence = dev->vbi_cap_seq_count; if (dev->field_cap == V4L2_FIELD_ALTERNATE) buf->vb.sequence /= 2; vivid_sliced_vbi_cap_fill(dev, buf->vb.sequence); memset(vbuf, 0x10, vb2_plane_size(&buf->vb.vb2_buf, 0)); if (!VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) vivid_vbi_gen_raw(&dev->vbi_gen, &vbi, vbuf); } void vivid_sliced_vbi_cap_process(struct vivid_dev *dev, struct vivid_buffer *buf) { struct v4l2_sliced_vbi_data *vbuf = vb2_plane_vaddr(&buf->vb.vb2_buf, 0); buf->vb.sequence = dev->vbi_cap_seq_count; if (dev->field_cap == V4L2_FIELD_ALTERNATE) buf->vb.sequence /= 2; vivid_sliced_vbi_cap_fill(dev, buf->vb.sequence); memset(vbuf, 0, vb2_plane_size(&buf->vb.vb2_buf, 0)); if (!VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) { unsigned i; for (i = 0; i < 25; i++) vbuf[i] = dev->vbi_gen.data[i]; } } static int vbi_cap_queue_setup(struct vb2_queue *vq, unsigned *nbuffers, unsigned *nplanes, unsigned sizes[], struct device *alloc_devs[]) { struct vivid_dev *dev = vb2_get_drv_priv(vq); bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; unsigned size = vq->type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ? 36 * sizeof(struct v4l2_sliced_vbi_data) : 1440 * 2 * (is_60hz ? 12 : 18); if (!vivid_is_sdtv_cap(dev)) return -EINVAL; if (*nplanes) return sizes[0] < size ? -EINVAL : 0; sizes[0] = size; *nplanes = 1; return 0; } static int vbi_cap_buf_prepare(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; unsigned size = vb->vb2_queue->type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ? 36 * sizeof(struct v4l2_sliced_vbi_data) : 1440 * 2 * (is_60hz ? 12 : 18); dprintk(dev, 1, "%s\n", __func__); if (dev->buf_prepare_error) { /* * Error injection: test what happens if buf_prepare() returns * an error. */ dev->buf_prepare_error = false; return -EINVAL; } if (vb2_plane_size(vb, 0) < size) { dprintk(dev, 1, "%s data will not fit into plane (%lu < %u)\n", __func__, vb2_plane_size(vb, 0), size); return -EINVAL; } vb2_set_plane_payload(vb, 0, size); return 0; } static void vbi_cap_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct vivid_buffer *buf = container_of(vbuf, struct vivid_buffer, vb); dprintk(dev, 1, "%s\n", __func__); spin_lock(&dev->slock); list_add_tail(&buf->list, &dev->vbi_cap_active); spin_unlock(&dev->slock); } static int vbi_cap_start_streaming(struct vb2_queue *vq, unsigned count) { struct vivid_dev *dev = vb2_get_drv_priv(vq); int err; dprintk(dev, 1, "%s\n", __func__); dev->vbi_cap_seq_count = 0; if (dev->start_streaming_error) { dev->start_streaming_error = false; err = -EINVAL; } else { err = vivid_start_generating_vid_cap(dev, &dev->vbi_cap_streaming); } if (err) { struct vivid_buffer *buf, *tmp; list_for_each_entry_safe(buf, tmp, &dev->vbi_cap_active, list) { list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED); } } return err; } /* abort streaming and wait for last buffer */ static void vbi_cap_stop_streaming(struct vb2_queue *vq) { struct vivid_dev *dev = vb2_get_drv_priv(vq); dprintk(dev, 1, "%s\n", __func__); vivid_stop_generating_vid_cap(dev, &dev->vbi_cap_streaming); } static void vbi_cap_buf_request_complete(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &dev->ctrl_hdl_vbi_cap); } const struct vb2_ops vivid_vbi_cap_qops = { .queue_setup = vbi_cap_queue_setup, .buf_prepare = vbi_cap_buf_prepare, .buf_queue = vbi_cap_buf_queue, .start_streaming = vbi_cap_start_streaming, .stop_streaming = vbi_cap_stop_streaming, .buf_request_complete = vbi_cap_buf_request_complete, }; int vidioc_g_fmt_vbi_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_vbi_format *vbi = &f->fmt.vbi; if (!vivid_is_sdtv_cap(dev) || !dev->has_raw_vbi_cap) return -EINVAL; vivid_g_fmt_vbi_cap(dev, vbi); return 0; } int vidioc_s_fmt_vbi_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); int ret = vidioc_g_fmt_vbi_cap(file, priv, f); if (ret) return ret; if (f->type != V4L2_BUF_TYPE_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; return 0; } void vivid_fill_service_lines(struct v4l2_sliced_vbi_format *vbi, u32 service_set) { vbi->io_size = sizeof(struct v4l2_sliced_vbi_data) * 36; vbi->service_set = service_set; memset(vbi->service_lines, 0, sizeof(vbi->service_lines)); memset(vbi->reserved, 0, sizeof(vbi->reserved)); if (vbi->service_set == 0) return; if (vbi->service_set & V4L2_SLICED_CAPTION_525) { vbi->service_lines[0][21] = V4L2_SLICED_CAPTION_525; vbi->service_lines[1][21] = V4L2_SLICED_CAPTION_525; } if (vbi->service_set & V4L2_SLICED_WSS_625) { unsigned i; for (i = 7; i <= 18; i++) vbi->service_lines[0][i] = vbi->service_lines[1][i] = V4L2_SLICED_TELETEXT_B; vbi->service_lines[0][23] = V4L2_SLICED_WSS_625; } } int vidioc_g_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap) return -EINVAL; vivid_fill_service_lines(vbi, dev->service_set_cap); return 0; } int vidioc_try_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; u32 service_set = vbi->service_set; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap) return -EINVAL; service_set &= is_60hz ? V4L2_SLICED_CAPTION_525 : V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; vivid_fill_service_lines(vbi, service_set); return 0; } int vidioc_s_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; int ret = vidioc_try_fmt_sliced_vbi_cap(file, fh, fmt); if (ret) return ret; if (fmt->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; dev->service_set_cap = vbi->service_set; return 0; } int vidioc_g_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_sliced_vbi_cap *cap) { struct vivid_dev *dev = video_drvdata(file); struct video_device *vdev = video_devdata(file); bool is_60hz; if (vdev->vfl_dir == VFL_DIR_RX) { is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap || cap->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE) return -EINVAL; } else { is_60hz = dev->std_out & V4L2_STD_525_60; if (!vivid_is_svid_out(dev) || !dev->has_sliced_vbi_out || cap->type != V4L2_BUF_TYPE_SLICED_VBI_OUTPUT) return -EINVAL; } cap->service_set = is_60hz ? V4L2_SLICED_CAPTION_525 : V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; if (is_60hz) { cap->service_lines[0][21] = V4L2_SLICED_CAPTION_525; cap->service_lines[1][21] = V4L2_SLICED_CAPTION_525; } else { unsigned i; for (i = 7; i <= 18; i++) cap->service_lines[0][i] = cap->service_lines[1][i] = V4L2_SLICED_TELETEXT_B; cap->service_lines[0][23] = V4L2_SLICED_WSS_625; } return 0; }
11 8 8 8 8 136 136 28 116 20 136 79 23 65 65 65 22 54 29 7 31 136 26 99 141 2 59 99 141 142 135 28 136 135 134 16 2 88 79 79 79 78 78 234 168 135 218 37 33 22 24 262 9 30 5 14 121 44 17 51 37 7 42 38 11 20 32 35 5 5 53 20 2 33 33 206 207 2 29 6 3 110 26 47 14 14 59 2 67 314 275 39 30 63 11 289 289 153 65 38 84 36 46 66 66 136 80 600 600 600 136 599 299 565 500 563 564 563 240 7 10 18 52 255 189 154 248 11 6 19 17 2 18 1 1 1 19 19 130 130 91 19 9 25 11 20 6 250 250 240 249 248 2 501 499 1 1 328 329 39 23 13 13 474 283 473 14 449 30 345 30 17 329 1 2 2 9 8 1 9 9 2 4 1 1 1 1 1 7 2 2 1 6 6 2 1 2 1 2 1 7 1 1 4 4 4 4 3 3 1 2 22 12 46 70 48 27 26 70 48 26 22 27 301 70 9 1 8 9 1 8 9 9 9 1 1 9 9 9 9 9 5 4 2 7 7 7 6 3 3 2 12 3 9 9 9 4 15 15 569 568 569 502 568 568 569 302 301 302 265 36 255 47 302 300 1 301 302 266 36 264 36 255 47 301 294 15 19 14 301 302 281 38 268 51 13 305 298 13 301 216 318 84 8 83 98 3 98 9 9 34 34 2 1 15 14 14 3 3 500 501 501 501 2 485 500 501 15 17 467 499 501 16 500 260 160 103 261 521 522 564 12 52 250 249 501 499 581 581 68 500 565 564 565 6 5 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID support for Linux * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2012 Jiri Kosina */ /* */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/unaligned.h> #include <asm/byteorder.h> #include <linux/input.h> #include <linux/wait.h> #include <linux/vmalloc.h> #include <linux/sched.h> #include <linux/semaphore.h> #include <linux/hid.h> #include <linux/hiddev.h> #include <linux/hid-debug.h> #include <linux/hidraw.h> #include "hid-ids.h" /* * Version Information */ #define DRIVER_DESC "HID core driver" static int hid_ignore_special_drivers = 0; module_param_named(ignore_special_drivers, hid_ignore_special_drivers, int, 0600); MODULE_PARM_DESC(ignore_special_drivers, "Ignore any special drivers and handle all devices by generic driver"); /* * Convert a signed n-bit integer to signed 32-bit integer. */ static s32 snto32(__u32 value, unsigned int n) { if (!value || !n) return 0; if (n > 32) n = 32; return sign_extend32(value, n - 1); } /* * Convert a signed 32-bit integer to a signed n-bit integer. */ static u32 s32ton(__s32 value, unsigned int n) { s32 a = value >> (n - 1); if (a && a != -1) return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; return value & ((1 << n) - 1); } /* * Register a new report for a device. */ struct hid_report *hid_register_report(struct hid_device *device, enum hid_report_type type, unsigned int id, unsigned int application) { struct hid_report_enum *report_enum = device->report_enum + type; struct hid_report *report; if (id >= HID_MAX_IDS) return NULL; if (report_enum->report_id_hash[id]) return report_enum->report_id_hash[id]; report = kzalloc(sizeof(struct hid_report), GFP_KERNEL); if (!report) return NULL; if (id != 0) report_enum->numbered = 1; report->id = id; report->type = type; report->size = 0; report->device = device; report->application = application; report_enum->report_id_hash[id] = report; list_add_tail(&report->list, &report_enum->report_list); INIT_LIST_HEAD(&report->field_entry_list); return report; } EXPORT_SYMBOL_GPL(hid_register_report); /* * Register a new field for this report. */ static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages) { struct hid_field *field; if (report->maxfield == HID_MAX_FIELDS) { hid_err(report->device, "too many fields in report\n"); return NULL; } field = kvzalloc((sizeof(struct hid_field) + usages * sizeof(struct hid_usage) + 3 * usages * sizeof(unsigned int)), GFP_KERNEL); if (!field) return NULL; field->index = report->maxfield++; report->field[field->index] = field; field->usage = (struct hid_usage *)(field + 1); field->value = (s32 *)(field->usage + usages); field->new_value = (s32 *)(field->value + usages); field->usages_priorities = (s32 *)(field->new_value + usages); field->report = report; return field; } /* * Open a collection. The type/usage is pushed on the stack. */ static int open_collection(struct hid_parser *parser, unsigned type) { struct hid_collection *collection; unsigned usage; int collection_index; usage = parser->local.usage[0]; if (parser->collection_stack_ptr == parser->collection_stack_size) { unsigned int *collection_stack; unsigned int new_size = parser->collection_stack_size + HID_COLLECTION_STACK_SIZE; collection_stack = krealloc(parser->collection_stack, new_size * sizeof(unsigned int), GFP_KERNEL); if (!collection_stack) return -ENOMEM; parser->collection_stack = collection_stack; parser->collection_stack_size = new_size; } if (parser->device->maxcollection == parser->device->collection_size) { collection = kmalloc( array3_size(sizeof(struct hid_collection), parser->device->collection_size, 2), GFP_KERNEL); if (collection == NULL) { hid_err(parser->device, "failed to reallocate collection array\n"); return -ENOMEM; } memcpy(collection, parser->device->collection, sizeof(struct hid_collection) * parser->device->collection_size); memset(collection + parser->device->collection_size, 0, sizeof(struct hid_collection) * parser->device->collection_size); kfree(parser->device->collection); parser->device->collection = collection; parser->device->collection_size *= 2; } parser->collection_stack[parser->collection_stack_ptr++] = parser->device->maxcollection; collection_index = parser->device->maxcollection++; collection = parser->device->collection + collection_index; collection->type = type; collection->usage = usage; collection->level = parser->collection_stack_ptr - 1; collection->parent_idx = (collection->level == 0) ? -1 : parser->collection_stack[collection->level - 1]; if (type == HID_COLLECTION_APPLICATION) parser->device->maxapplication++; return 0; } /* * Close a collection. */ static int close_collection(struct hid_parser *parser) { if (!parser->collection_stack_ptr) { hid_err(parser->device, "collection stack underflow\n"); return -EINVAL; }