4 95 113 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/xattr_trusted.c * Handler for trusted extended attributes. * * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> */ #include <linux/string.h> #include <linux/capability.h> #include <linux/fs.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" static bool ext4_xattr_trusted_list(struct dentry *dentry) { return capable(CAP_SYS_ADMIN); } static int ext4_xattr_trusted_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, buffer, size); } static int ext4_xattr_trusted_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, value, size, flags); } const struct xattr_handler ext4_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, .list = ext4_xattr_trusted_list, .get = ext4_xattr_trusted_get, .set = ext4_xattr_trusted_set, };
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * ahci.h - Common AHCI SATA definitions and declarations * * Maintained by: Tejun Heo <tj@kernel.org> * Please ALWAYS copy linux-ide@vger.kernel.org * on emails. * * Copyright 2004-2005 Red Hat, Inc. * * libata documentation is available via 'make {ps|pdf}docs', * as Documentation/driver-api/libata.rst * * AHCI hardware documentation: * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf * http://www.intel.com/technology/serialata/pdf/rev1_1.pdf */ #ifndef _AHCI_H #define _AHCI_H #include <linux/pci.h> #include <linux/clk.h> #include <linux/libata.h> #include <linux/phy/phy.h> #include <linux/regulator/consumer.h> #include <linux/bits.h> /* Enclosure Management Control */ #define EM_CTRL_MSG_TYPE 0x000f0000 /* Enclosure Management LED Message Type */ #define EM_MSG_LED_HBA_PORT 0x0000000f #define EM_MSG_LED_PMP_SLOT 0x0000ff00 #define EM_MSG_LED_VALUE 0xffff0000 #define EM_MSG_LED_VALUE_ACTIVITY 0x00070000 #define EM_MSG_LED_VALUE_OFF 0xfff80000 #define EM_MSG_LED_VALUE_ON 0x00010000 enum { AHCI_MAX_PORTS = 32, AHCI_MAX_SG = 168, /* hardware max is 64K */ AHCI_DMA_BOUNDARY = 0xffffffff, AHCI_MAX_CMDS = 32, AHCI_CMD_SZ = 32, AHCI_CMD_SLOT_SZ = AHCI_MAX_CMDS * AHCI_CMD_SZ, AHCI_RX_FIS_SZ = 256, AHCI_CMD_TBL_CDB = 0x40, AHCI_CMD_TBL_HDR_SZ = 0x80, AHCI_CMD_TBL_SZ = AHCI_CMD_TBL_HDR_SZ + (AHCI_MAX_SG * 16), AHCI_CMD_TBL_AR_SZ = AHCI_CMD_TBL_SZ * AHCI_MAX_CMDS, AHCI_PORT_PRIV_DMA_SZ = AHCI_CMD_SLOT_SZ + AHCI_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ, AHCI_PORT_PRIV_FBS_DMA_SZ = AHCI_CMD_SLOT_SZ + AHCI_CMD_TBL_AR_SZ + (AHCI_RX_FIS_SZ * 16), AHCI_IRQ_ON_SG = BIT(31), AHCI_CMD_ATAPI = BIT(5), AHCI_CMD_WRITE = BIT(6), AHCI_CMD_PREFETCH = BIT(7), AHCI_CMD_RESET = BIT(8), AHCI_CMD_CLR_BUSY = BIT(10), RX_FIS_PIO_SETUP = 0x20, /* offset of PIO Setup FIS data */ RX_FIS_D2H_REG = 0x40, /* offset of D2H Register FIS data */ RX_FIS_SDB = 0x58, /* offset of SDB FIS data */ RX_FIS_UNK = 0x60, /* offset of Unknown FIS data */ /* global controller registers */ HOST_CAP = 0x00, /* host capabilities */ HOST_CTL = 0x04, /* global host control */ HOST_IRQ_STAT = 0x08, /* interrupt status */ HOST_PORTS_IMPL = 0x0c, /* bitmap of implemented ports */ HOST_VERSION = 0x10, /* AHCI spec. version compliancy */ HOST_EM_LOC = 0x1c, /* Enclosure Management location */ HOST_EM_CTL = 0x20, /* Enclosure Management Control */ HOST_CAP2 = 0x24, /* host capabilities, extended */ /* HOST_CTL bits */ HOST_RESET = BIT(0), /* reset controller; self-clear */ HOST_IRQ_EN = BIT(1), /* global IRQ enable */ HOST_MRSM = BIT(2), /* MSI Revert to Single Message */ HOST_AHCI_EN = BIT(31), /* AHCI enabled */ /* HOST_CAP bits */ HOST_CAP_SXS = BIT(5), /* Supports External SATA */ HOST_CAP_EMS = BIT(6), /* Enclosure Management support */ HOST_CAP_CCC = BIT(7), /* Command Completion Coalescing */ HOST_CAP_PART = BIT(13), /* Partial state capable */ HOST_CAP_SSC = BIT(14), /* Slumber state capable */ HOST_CAP_PIO_MULTI = BIT(15), /* PIO multiple DRQ support */ HOST_CAP_FBS = BIT(16), /* FIS-based switching support */ HOST_CAP_PMP = BIT(17), /* Port Multiplier support */ HOST_CAP_ONLY = BIT(18), /* Supports AHCI mode only */ HOST_CAP_CLO = BIT(24), /* Command List Override support */ HOST_CAP_LED = BIT(25), /* Supports activity LED */ HOST_CAP_ALPM = BIT(26), /* Aggressive Link PM support */ HOST_CAP_SSS = BIT(27), /* Staggered Spin-up */ HOST_CAP_MPS = BIT(28), /* Mechanical presence switch */ HOST_CAP_SNTF = BIT(29), /* SNotification register */ HOST_CAP_NCQ = BIT(30), /* Native Command Queueing */ HOST_CAP_64 = BIT(31), /* PCI DAC (64-bit DMA) support */ /* HOST_CAP2 bits */ HOST_CAP2_BOH = BIT(0), /* BIOS/OS handoff supported */ HOST_CAP2_NVMHCI = BIT(1), /* NVMHCI supported */ HOST_CAP2_APST = BIT(2), /* Automatic partial to slumber */ HOST_CAP2_SDS = BIT(3), /* Support device sleep */ HOST_CAP2_SADM = BIT(4), /* Support aggressive DevSlp */ HOST_CAP2_DESO = BIT(5), /* DevSlp from slumber only */ /* registers for each SATA port */ PORT_LST_ADDR = 0x00, /* command list DMA addr */ PORT_LST_ADDR_HI = 0x04, /* command list DMA addr hi */ PORT_FIS_ADDR = 0x08, /* FIS rx buf addr */ PORT_FIS_ADDR_HI = 0x0c, /* FIS rx buf addr hi */ PORT_IRQ_STAT = 0x10, /* interrupt status */ PORT_IRQ_MASK = 0x14, /* interrupt enable/disable mask */ PORT_CMD = 0x18, /* port command */ PORT_TFDATA = 0x20, /* taskfile data */ PORT_SIG = 0x24, /* device TF signature */ PORT_CMD_ISSUE = 0x38, /* command issue */ PORT_SCR_STAT = 0x28, /* SATA phy register: SStatus */ PORT_SCR_CTL = 0x2c, /* SATA phy register: SControl */ PORT_SCR_ERR = 0x30, /* SATA phy register: SError */ PORT_SCR_ACT = 0x34, /* SATA phy register: SActive */ PORT_SCR_NTF = 0x3c, /* SATA phy register: SNotification */ PORT_FBS = 0x40, /* FIS-based Switching */ PORT_DEVSLP = 0x44, /* device sleep */ /* PORT_IRQ_{STAT,MASK} bits */ PORT_IRQ_COLD_PRES = BIT(31), /* cold presence detect */ PORT_IRQ_TF_ERR = BIT(30), /* task file error */ PORT_IRQ_HBUS_ERR = BIT(29), /* host bus fatal error */ PORT_IRQ_HBUS_DATA_ERR = BIT(28), /* host bus data error */ PORT_IRQ_IF_ERR = BIT(27), /* interface fatal error */ PORT_IRQ_IF_NONFATAL = BIT(26), /* interface non-fatal error */ PORT_IRQ_OVERFLOW = BIT(24), /* xfer exhausted available S/G */ PORT_IRQ_BAD_PMP = BIT(23), /* incorrect port multiplier */ PORT_IRQ_PHYRDY = BIT(22), /* PhyRdy changed */ PORT_IRQ_DMPS = BIT(7), /* mechanical presence status */ PORT_IRQ_CONNECT = BIT(6), /* port connect change status */ PORT_IRQ_SG_DONE = BIT(5), /* descriptor processed */ PORT_IRQ_UNK_FIS = BIT(4), /* unknown FIS rx'd */ PORT_IRQ_SDB_FIS = BIT(3), /* Set Device Bits FIS rx'd */ PORT_IRQ_DMAS_FIS = BIT(2), /* DMA Setup FIS rx'd */ PORT_IRQ_PIOS_FIS = BIT(1), /* PIO Setup FIS rx'd */ PORT_IRQ_D2H_REG_FIS = BIT(0), /* D2H Register FIS rx'd */ PORT_IRQ_FREEZE = PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP, PORT_IRQ_ERROR = PORT_IRQ_FREEZE | PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR, DEF_PORT_IRQ = PORT_IRQ_ERROR | PORT_IRQ_SG_DONE | PORT_IRQ_SDB_FIS | PORT_IRQ_DMAS_FIS | PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS, /* PORT_CMD bits */ PORT_CMD_ASP = BIT(27), /* Aggressive Slumber/Partial */ PORT_CMD_ALPE = BIT(26), /* Aggressive Link PM enable */ PORT_CMD_ATAPI = BIT(24), /* Device is ATAPI */ PORT_CMD_FBSCP = BIT(22), /* FBS Capable Port */ PORT_CMD_ESP = BIT(21), /* External Sata Port */ PORT_CMD_CPD = BIT(20), /* Cold Presence Detection */ PORT_CMD_MPSP = BIT(19), /* Mechanical Presence Switch */ PORT_CMD_HPCP = BIT(18), /* HotPlug Capable Port */ PORT_CMD_PMP = BIT(17), /* PMP attached */ PORT_CMD_LIST_ON = BIT(15), /* cmd list DMA engine running */ PORT_CMD_FIS_ON = BIT(14), /* FIS DMA engine running */ PORT_CMD_FIS_RX = BIT(4), /* Enable FIS receive DMA engine */ PORT_CMD_CLO = BIT(3), /* Command list override */ PORT_CMD_POWER_ON = BIT(2), /* Power up device */ PORT_CMD_SPIN_UP = BIT(1), /* Spin up device */ PORT_CMD_START = BIT(0), /* Enable port DMA engine */ PORT_CMD_ICC_MASK = (0xfu << 28), /* i/f ICC state mask */ PORT_CMD_ICC_ACTIVE = (0x1u << 28), /* Put i/f in active state */ PORT_CMD_ICC_PARTIAL = (0x2u << 28), /* Put i/f in partial state */ PORT_CMD_ICC_SLUMBER = (0x6u << 28), /* Put i/f in slumber state */ /* PORT_CMD capabilities mask */ PORT_CMD_CAP = PORT_CMD_HPCP | PORT_CMD_MPSP | PORT_CMD_CPD | PORT_CMD_ESP | PORT_CMD_FBSCP, /* PORT_FBS bits */ PORT_FBS_DWE_OFFSET = 16, /* FBS device with error offset */ PORT_FBS_ADO_OFFSET = 12, /* FBS active dev optimization offset */ PORT_FBS_DEV_OFFSET = 8, /* FBS device to issue offset */ PORT_FBS_DEV_MASK = (0xf << PORT_FBS_DEV_OFFSET), /* FBS.DEV */ PORT_FBS_SDE = BIT(2), /* FBS single device error */ PORT_FBS_DEC = BIT(1), /* FBS device error clear */ PORT_FBS_EN = BIT(0), /* Enable FBS */ /* PORT_DEVSLP bits */ PORT_DEVSLP_DM_OFFSET = 25, /* DITO multiplier offset */ PORT_DEVSLP_DM_MASK = (0xf << 25), /* DITO multiplier mask */ PORT_DEVSLP_DITO_OFFSET = 15, /* DITO offset */ PORT_DEVSLP_MDAT_OFFSET = 10, /* Minimum assertion time */ PORT_DEVSLP_DETO_OFFSET = 2, /* DevSlp exit timeout */ PORT_DEVSLP_DSP = BIT(1), /* DevSlp present */ PORT_DEVSLP_ADSE = BIT(0), /* Aggressive DevSlp enable */ /* hpriv->flags bits */ #define AHCI_HFLAGS(flags) .private_data = (void *)(flags) AHCI_HFLAG_NO_NCQ = BIT(0), AHCI_HFLAG_IGN_IRQ_IF_ERR = BIT(1), /* ignore IRQ_IF_ERR */ AHCI_HFLAG_IGN_SERR_INTERNAL = BIT(2), /* ignore SERR_INTERNAL */ AHCI_HFLAG_32BIT_ONLY = BIT(3), /* force 32bit */ AHCI_HFLAG_MV_PATA = BIT(4), /* PATA port */ AHCI_HFLAG_NO_MSI = BIT(5), /* no PCI MSI */ AHCI_HFLAG_NO_PMP = BIT(6), /* no PMP */ AHCI_HFLAG_SECT255 = BIT(8), /* max 255 sectors */ AHCI_HFLAG_YES_NCQ = BIT(9), /* force NCQ cap on */ AHCI_HFLAG_NO_SUSPEND = BIT(10), /* don't suspend */ AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = BIT(11), /* treat SRST timeout as link offline */ AHCI_HFLAG_NO_SNTF = BIT(12), /* no sntf */ AHCI_HFLAG_NO_FPDMA_AA = BIT(13), /* no FPDMA AA */ AHCI_HFLAG_YES_FBS = BIT(14), /* force FBS cap on */ AHCI_HFLAG_DELAY_ENGINE = BIT(15), /* do not start engine on port start (wait until error-handling stage) */ AHCI_HFLAG_NO_DEVSLP = BIT(17), /* no device sleep */ AHCI_HFLAG_NO_FBS = BIT(18), /* no FBS */ #ifdef CONFIG_PCI_MSI AHCI_HFLAG_MULTI_MSI = BIT(20), /* per-port MSI(-X) */ #else /* compile out MSI infrastructure */ AHCI_HFLAG_MULTI_MSI = 0, #endif AHCI_HFLAG_WAKE_BEFORE_STOP = BIT(22), /* wake before DMA stop */ AHCI_HFLAG_YES_ALPM = BIT(23), /* force ALPM cap on */ AHCI_HFLAG_NO_WRITE_TO_RO = BIT(24), /* don't write to read only registers */ AHCI_HFLAG_SUSPEND_PHYS = BIT(25), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(26), /* SXS not supported */ AHCI_HFLAG_43BIT_ONLY = BIT(27), /* 43bit DMA addr limit */ AHCI_HFLAG_INTEL_PCS_QUIRK = BIT(28), /* apply Intel PCS quirk */ /* ap->flags bits */ AHCI_FLAG_COMMON = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_ACPI_SATA | ATA_FLAG_AN, ICH_MAP = 0x90, /* ICH MAP register */ PCS_6 = 0x92, /* 6 port PCS */ PCS_7 = 0x94, /* 7+ port PCS (Denverton) */ /* em constants */ EM_MAX_SLOTS = SATA_PMP_MAX_PORTS, EM_MAX_RETRY = 5, /* em_ctl bits */ EM_CTL_RST = BIT(9), /* Reset */ EM_CTL_TM = BIT(8), /* Transmit Message */ EM_CTL_MR = BIT(0), /* Message Received */ EM_CTL_ALHD = BIT(26), /* Activity LED */ EM_CTL_XMT = BIT(25), /* Transmit Only */ EM_CTL_SMB = BIT(24), /* Single Message Buffer */ EM_CTL_SGPIO = BIT(19), /* SGPIO messages supported */ EM_CTL_SES = BIT(18), /* SES-2 messages supported */ EM_CTL_SAFTE = BIT(17), /* SAF-TE messages supported */ EM_CTL_LED = BIT(16), /* LED messages supported */ /* em message type */ EM_MSG_TYPE_LED = BIT(0), /* LED */ EM_MSG_TYPE_SAFTE = BIT(1), /* SAF-TE */ EM_MSG_TYPE_SES2 = BIT(2), /* SES-2 */ EM_MSG_TYPE_SGPIO = BIT(3), /* SGPIO */ }; struct ahci_cmd_hdr { __le32 opts; __le32 status; __le32 tbl_addr; __le32 tbl_addr_hi; __le32 reserved[4]; }; struct ahci_sg { __le32 addr; __le32 addr_hi; __le32 reserved; __le32 flags_size; }; struct ahci_em_priv { enum sw_activity blink_policy; struct timer_list timer; unsigned long saved_activity; unsigned long activity; unsigned long led_state; struct ata_link *link; }; struct ahci_port_priv { struct ata_link *active_link; struct ahci_cmd_hdr *cmd_slot; dma_addr_t cmd_slot_dma; void *cmd_tbl; dma_addr_t cmd_tbl_dma; void *rx_fis; dma_addr_t rx_fis_dma; /* for NCQ spurious interrupt analysis */ unsigned int ncq_saw_d2h:1; unsigned int ncq_saw_dmas:1; unsigned int ncq_saw_sdb:1; spinlock_t lock; /* protects parent ata_port */ u32 intr_mask; /* interrupts to enable */ bool fbs_supported; /* set iff FBS is supported */ bool fbs_enabled; /* set iff FBS is enabled */ int fbs_last_dev; /* save FBS.DEV of last FIS */ /* enclosure management info per PM slot */ struct ahci_em_priv em_priv[EM_MAX_SLOTS]; char *irq_desc; /* desc in /proc/interrupts */ }; struct ahci_host_priv { /* Input fields */ unsigned int flags; /* AHCI_HFLAG_* */ u32 mask_port_map; /* mask out particular bits */ void __iomem * mmio; /* bus-independent mem map */ u32 cap; /* cap to use */ u32 cap2; /* cap2 to use */ u32 version; /* cached version */ u32 port_map; /* port map to use */ u32 saved_cap; /* saved initial cap */ u32 saved_cap2; /* saved initial cap2 */ u32 saved_port_map; /* saved initial port_map */ u32 saved_port_cap[AHCI_MAX_PORTS]; /* saved port_cap */ u32 em_loc; /* enclosure management location */ u32 em_buf_sz; /* EM buffer size in byte */ u32 em_msg_type; /* EM message type */ u32 remapped_nvme; /* NVMe remapped device count */ bool got_runtime_pm; /* Did we do pm_runtime_get? */ unsigned int n_clks; struct clk_bulk_data *clks; /* Optional */ unsigned int f_rsts; struct reset_control *rsts; /* Optional */ struct regulator **target_pwrs; /* Optional */ struct regulator *ahci_regulator;/* Optional */ struct regulator *phy_regulator;/* Optional */ /* * If platform uses PHYs. There is a 1:1 relation between the port number and * the PHY position in this array. */ struct phy **phys; unsigned nports; /* Number of ports */ void *plat_data; /* Other platform data */ unsigned int irq; /* interrupt line */ /* * Optional ahci_start_engine override, if not set this gets set to the * default ahci_start_engine during ahci_save_initial_config, this can * be overridden anytime before the host is activated. */ void (*start_engine)(struct ata_port *ap); /* * Optional ahci_stop_engine override, if not set this gets set to the * default ahci_stop_engine during ahci_save_initial_config, this can * be overridden anytime before the host is activated. */ int (*stop_engine)(struct ata_port *ap); irqreturn_t (*irq_handler)(int irq, void *dev_instance); /* only required for per-port MSI(-X) support */ int (*get_irq_vector)(struct ata_host *host, int port); }; extern int ahci_ignore_sss; extern const struct attribute_group *ahci_shost_groups[]; extern const struct attribute_group *ahci_sdev_groups[]; /* * This must be instantiated by the edge drivers. Read the comments * for ATA_BASE_SHT */ #define AHCI_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ .can_queue = AHCI_MAX_CMDS, \ .sg_tablesize = AHCI_MAX_SG, \ .dma_boundary = AHCI_DMA_BOUNDARY, \ .shost_groups = ahci_shost_groups, \ .sdev_groups = ahci_sdev_groups, \ .change_queue_depth = ata_scsi_change_queue_depth, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .slave_configure = ata_scsi_slave_config extern struct ata_port_operations ahci_ops; extern struct ata_port_operations ahci_platform_ops; extern struct ata_port_operations ahci_pmp_retry_srst_ops; unsigned int ahci_dev_classify(struct ata_port *ap); void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag, u32 opts); void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv); void ahci_init_controller(struct ata_host *host); int ahci_reset_controller(struct ata_host *host); int ahci_do_softreset(struct ata_link *link, unsigned int *class, int pmp, unsigned long deadline, int (*check_ready)(struct ata_link *link)); int ahci_do_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline, bool *online); unsigned int ahci_qc_issue(struct ata_queued_cmd *qc); int ahci_stop_engine(struct ata_port *ap); void ahci_start_fis_rx(struct ata_port *ap); void ahci_start_engine(struct ata_port *ap); int ahci_check_ready(struct ata_link *link); int ahci_kick_engine(struct ata_port *ap); int ahci_port_resume(struct ata_port *ap); void ahci_set_em_messages(struct ahci_host_priv *hpriv, struct ata_port_info *pi); int ahci_reset_em(struct ata_host *host); void ahci_print_info(struct ata_host *host, const char *scc_s); int ahci_host_activate(struct ata_host *host, const struct scsi_host_template *sht); void ahci_error_handler(struct ata_port *ap); u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked); static inline void __iomem *__ahci_port_base(struct ahci_host_priv *hpriv, unsigned int port_no) { void __iomem *mmio = hpriv->mmio; return mmio + 0x100 + (port_no * 0x80); } static inline void __iomem *ahci_port_base(struct ata_port *ap) { struct ahci_host_priv *hpriv = ap->host->private_data; return __ahci_port_base(hpriv, ap->port_no); } static inline int ahci_nr_ports(u32 cap) { return (cap & 0x1f) + 1; } #endif /* _AHCI_H */
30 30 30 30 38 23 29 16 30 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> */ #include <linux/types.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/bio.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "decompressor.h" #include "squashfs.h" /* * This file implements single-threaded decompression in the * decompressor framework */ struct squashfs_stream { void *stream; struct mutex mutex; }; static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, void *comp_opts) { struct squashfs_stream *stream; int err = -ENOMEM; stream = kmalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) goto out; stream->stream = msblk->decompressor->init(msblk, comp_opts); if (IS_ERR(stream->stream)) { err = PTR_ERR(stream->stream); goto out; } kfree(comp_opts); mutex_init(&stream->mutex); return stream; out: kfree(stream); return ERR_PTR(err); } static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) { struct squashfs_stream *stream = msblk->stream; if (stream) { msblk->decompressor->free(stream->stream); kfree(stream); } } static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { int res; struct squashfs_stream *stream = msblk->stream; mutex_lock(&stream->mutex); res = msblk->decompressor->decompress(msblk, stream->stream, bio, offset, length, output); mutex_unlock(&stream->mutex); if (res < 0) ERROR("%s decompression failed, data probably corrupt\n", msblk->decompressor->name); return res; } static int squashfs_max_decompressors(void) { return 1; } const struct squashfs_decompressor_thread_ops squashfs_decompressor_single = { .create = squashfs_decompressor_create, .destroy = squashfs_decompressor_destroy, .decompress = squashfs_decompress, .max_decompressors = squashfs_max_decompressors, };
4 4 4 4 4 4 8 1 7 4 4 4 4 4 4 4 4 4 4 4 8 8 8 8 8 8 7 8 8 8 8 8 8 8 1 1 1 10 42 4 4 11 11 11 8 8 8 8 8 4 4 4 8 8 8 8 8 8 8 8 8 133 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "bbpos.h" #include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" #include "errcode.h" #include "error.h" #include "journal.h" #include "trace.h" #include <linux/prefetch.h> #include <linux/sched/mm.h> const char * const bch2_btree_node_flags[] = { #define x(f) #f, BTREE_FLAGS() #undef x NULL }; void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; if (!c->btree_roots_known[0].b) reserve += 8; for (i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->b) reserve += min_t(unsigned, 1, r->b->c.level) * 8; } c->btree_cache.reserve = reserve; } static inline unsigned btree_cache_can_free(struct btree_cache *bc) { return max_t(int, 0, bc->used - bc->reserve); } static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) { if (b->c.lock.readers) list_move(&b->list, &bc->freed_pcpu); else list_move(&b->list, &bc->freed_nonpcpu); } static void btree_node_data_free(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; EBUG_ON(btree_node_write_in_flight(b)); clear_btree_node_just_written(b); kvfree(b->data); b->data = NULL; #ifdef __KERNEL__ kvfree(b->aux_data); #else munmap(b->aux_data, btree_aux_data_bytes(b)); #endif b->aux_data = NULL; bc->used--; btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { const struct btree *b = obj; const u64 *v = arg->key; return b->hash_val == *v ? 0 : 1; } static const struct rhashtable_params bch_btree_cache_params = { .head_offset = offsetof(struct btree, hash), .key_offset = offsetof(struct btree, hash_val), .key_len = sizeof(u64), .obj_cmpfn = bch2_btree_cache_cmp_fn, }; static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) { BUG_ON(b->data || b->aux_data); b->data = kvmalloc(btree_buf_bytes(b), gfp); if (!b->data) return -BCH_ERR_ENOMEM_btree_node_mem_alloc; #ifdef __KERNEL__ b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); #else b->aux_data = mmap(NULL, btree_aux_data_bytes(b), PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (b->aux_data == MAP_FAILED) b->aux_data = NULL; #endif if (!b->aux_data) { kvfree(b->data); b->data = NULL; return -BCH_ERR_ENOMEM_btree_node_mem_alloc; } return 0; } static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) { struct btree *b; b = kzalloc(sizeof(struct btree), gfp); if (!b) return NULL; bkey_btree_ptr_init(&b->key); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); b->byte_order = ilog2(c->opts.btree_node_size); return b; } struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) return NULL; if (btree_node_data_alloc(c, b, GFP_KERNEL)) { kfree(b); return NULL; } bch2_btree_lock_init(&b->c, 0); bc->used++; list_add(&b->list, &bc->freeable); return b; } /* Btree in memory cache - hash table */ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); BUG_ON(ret); /* Cause future lookups for this node to fail: */ b->hash_val = 0; } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) { BUG_ON(b->hash_val); b->hash_val = btree_ptr_hash_val(&b->key); return rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params); } int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, unsigned level, enum btree_id id) { int ret; b->c.level = level; b->c.btree_id = id; mutex_lock(&bc->lock); ret = __bch2_btree_node_hash_insert(bc, b); if (!ret) list_add_tail(&b->list, &bc->live); mutex_unlock(&bc->lock); return ret; } __flatten static inline struct btree *btree_cache_find(struct btree_cache *bc, const struct bkey_i *k) { u64 v = btree_ptr_hash_val(k); return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); } /* * this version is for btree nodes that have already been freed (we're not * reaping a real btree node) */ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) { struct btree_cache *bc = &c->btree_cache; int ret = 0; lockdep_assert_held(&bc->lock); struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p); u64 mask = b->c.level ? bc->pinned_nodes_interior_mask : bc->pinned_nodes_leaf_mask; if ((mask & BIT_ULL(b->c.btree_id)) && bbpos_cmp(bc->pinned_nodes_start, pos) < 0 && bbpos_cmp(bc->pinned_nodes_end, pos) >= 0) return -BCH_ERR_ENOMEM_btree_node_reclaim; wait_on_io: if (b->flags & ((1U << BTREE_NODE_dirty)| (1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { if (!flush) return -BCH_ERR_ENOMEM_btree_node_reclaim; /* XXX: waiting on IO with btree cache lock held */ bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_write(b); } if (!six_trylock_intent(&b->c.lock)) return -BCH_ERR_ENOMEM_btree_node_reclaim; if (!six_trylock_write(&b->c.lock)) goto out_unlock_intent; /* recheck under lock */ if (b->flags & ((1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { if (!flush) goto out_unlock; six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } if (btree_node_noevict(b) || btree_node_write_blocked(b) || btree_node_will_make_reachable(b)) goto out_unlock; if (btree_node_dirty(b)) { if (!flush) goto out_unlock; /* * Using the underscore version because we don't want to compact * bsets after the write, since this node is about to be evicted * - unless btree verify mode is enabled, since it runs out of * the post write cleanup: */ if (bch2_verify_btree_ondisk) bch2_btree_node_write(c, b, SIX_LOCK_intent, BTREE_WRITE_cache_reclaim); else __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } out: if (b->hash_val && !ret) trace_and_count(c, btree_cache_reap, c, b); return ret; out_unlock: six_unlock_write(&b->c.lock); out_unlock_intent: six_unlock_intent(&b->c.lock); ret = -BCH_ERR_ENOMEM_btree_node_reclaim; goto out; } static int btree_node_reclaim(struct bch_fs *c, struct btree *b) { return __btree_node_reclaim(c, b, false); } static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) { return __btree_node_reclaim(c, b, true); } static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct bch_fs *c = shrink->private_data; struct btree_cache *bc = &c->btree_cache; struct btree *b, *t; unsigned long nr = sc->nr_to_scan; unsigned long can_free = 0; unsigned long freed = 0; unsigned long touched = 0; unsigned i, flags; unsigned long ret = SHRINK_STOP; bool trigger_writes = atomic_read(&bc->dirty) + nr >= bc->used * 3 / 4; if (bch2_btree_shrinker_disabled) return SHRINK_STOP; mutex_lock(&bc->lock); flags = memalloc_nofs_save(); /* * It's _really_ critical that we don't free too many btree nodes - we * have to always leave ourselves a reserve. The reserve is how we * guarantee that allocating memory for a new btree node can always * succeed, so that inserting keys into the btree can always succeed and * IO can always make forward progress: */ can_free = btree_cache_can_free(bc); nr = min_t(unsigned long, nr, can_free); i = 0; list_for_each_entry_safe(b, t, &bc->freeable, list) { /* * Leave a few nodes on the freeable list, so that a btree split * won't have to hit the system allocator: */ if (++i <= 3) continue; touched++; if (touched >= nr) goto out; if (!btree_node_reclaim(c, b)) { btree_node_data_free(c, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); freed++; } } restart: list_for_each_entry_safe(b, t, &bc->live, list) { touched++; if (btree_node_accessed(b)) { clear_btree_node_accessed(b); } else if (!btree_node_reclaim(c, b)) { freed++; btree_node_data_free(c, b); bch2_btree_node_hash_remove(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); if (freed == nr) goto out_rotate; } else if (trigger_writes && btree_node_dirty(b) && !btree_node_will_make_reachable(b) && !btree_node_write_blocked(b) && six_trylock_read(&b->c.lock)) { list_move(&bc->live, &b->list); mutex_unlock(&bc->lock); __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_read(&b->c.lock); if (touched >= nr) goto out_nounlock; mutex_lock(&bc->lock); goto restart; } if (touched >= nr) break; } out_rotate: if (&t->list != &bc->live) list_move_tail(&bc->live, &t->list); out: mutex_unlock(&bc->lock); out_nounlock: ret = freed; memalloc_nofs_restore(flags); trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret); return ret; } static unsigned long bch2_btree_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct bch_fs *c = shrink->private_data; struct btree_cache *bc = &c->btree_cache; if (bch2_btree_shrinker_disabled) return 0; return btree_cache_can_free(bc); } void bch2_fs_btree_cache_exit(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; unsigned i, flags; shrinker_free(bc->shrink); /* vfree() can allocate memory: */ flags = memalloc_nofs_save(); mutex_lock(&bc->lock); if (c->verify_data) list_move(&c->verify_data->list, &bc->live); kvfree(c->verify_ondisk); for (i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->b) list_add(&r->b->list, &bc->live); } list_splice(&bc->freeable, &bc->live); while (!list_empty(&bc->live)) { b = list_first_entry(&bc->live, struct btree, list); BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); btree_node_data_free(c, b); } BUG_ON(!bch2_journal_error(&c->journal) && atomic_read(&c->btree_cache.dirty)); list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); while (!list_empty(&bc->freed_nonpcpu)) { b = list_first_entry(&bc->freed_nonpcpu, struct btree, list); list_del(&b->list); six_lock_exit(&b->c.lock); kfree(b); } mutex_unlock(&bc->lock); memalloc_nofs_restore(flags); if (bc->table_init_done) rhashtable_destroy(&bc->table); } int bch2_fs_btree_cache_init(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct shrinker *shrink; unsigned i; int ret = 0; ret = rhashtable_init(&bc->table, &bch_btree_cache_params); if (ret) goto err; bc->table_init_done = true; bch2_recalc_btree_reserve(c); for (i = 0; i < bc->reserve; i++) if (!__bch2_btree_node_mem_alloc(c)) goto err; list_splice_init(&bc->live, &bc->freeable); mutex_init(&c->verify_lock); shrink = shrinker_alloc(0, "%s-btree_cache", c->name); if (!shrink) goto err; bc->shrink = shrink; shrink->count_objects = bch2_btree_cache_count; shrink->scan_objects = bch2_btree_cache_scan; shrink->seeks = 4; shrink->private_data = c; shrinker_register(shrink); return 0; err: return -BCH_ERR_ENOMEM_fs_btree_cache_init; } void bch2_fs_btree_cache_init_early(struct btree_cache *bc) { mutex_init(&bc->lock); INIT_LIST_HEAD(&bc->live); INIT_LIST_HEAD(&bc->freeable); INIT_LIST_HEAD(&bc->freed_pcpu); INIT_LIST_HEAD(&bc->freed_nonpcpu); } /* * We can only have one thread cannibalizing other cached btree nodes at a time, * or we'll deadlock. We use an open coded mutex to ensure that, which a * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; if (bc->alloc_lock == current) { trace_and_count(c, btree_cache_cannibalize_unlock, trans); bc->alloc_lock = NULL; closure_wake_up(&bc->alloc_wait); } } int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct task_struct *old; old = cmpxchg(&bc->alloc_lock, NULL, current); if (old == NULL || old == current) goto success; if (!cl) { trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; } closure_wait(&bc->alloc_wait, cl); /* Try again, after adding ourselves to waitlist */ old = cmpxchg(&bc->alloc_lock, NULL, current); if (old == NULL || old == current) { /* We raced */ closure_wake_up(&bc->alloc_wait); goto success; } trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return -BCH_ERR_btree_cache_cannibalize_lock_blocked; success: trace_and_count(c, btree_cache_cannibalize_lock, trans); return 0; } static struct btree *btree_node_cannibalize(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; list_for_each_entry_reverse(b, &bc->live, list) if (!btree_node_reclaim(c, b)) return b; while (1) { list_for_each_entry_reverse(b, &bc->live, list) if (!btree_node_write_and_reclaim(c, b)) return b; /* * Rare case: all nodes were intent-locked. * Just busy-wait. */ WARN_ONCE(1, "btree cache cannibalize failed\n"); cond_resched(); } } struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct list_head *freed = pcpu_read_locks ? &bc->freed_pcpu : &bc->freed_nonpcpu; struct btree *b, *b2; u64 start_time = local_clock(); unsigned flags; flags = memalloc_nofs_save(); mutex_lock(&bc->lock); /* * We never free struct btree itself, just the memory that holds the on * disk node. Check the freed list before allocating a new one: */ list_for_each_entry(b, freed, list) if (!btree_node_reclaim(c, b)) { list_del_init(&b->list); goto got_node; } b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); if (!b) { mutex_unlock(&bc->lock); bch2_trans_unlock(trans); b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) goto err; mutex_lock(&bc->lock); } bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); got_node: /* * btree_free() doesn't free memory; it sticks the node on the end of * the list. Check if there's any freed nodes there: */ list_for_each_entry(b2, &bc->freeable, list) if (!btree_node_reclaim(c, b2)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); goto got_mem; } mutex_unlock(&bc->lock); if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { bch2_trans_unlock(trans); if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) goto err; } mutex_lock(&bc->lock); bc->used++; got_mem: mutex_unlock(&bc->lock); BUG_ON(btree_node_hashed(b)); BUG_ON(btree_node_dirty(b)); BUG_ON(btree_node_write_in_flight(b)); out: b->flags = 0; b->written = 0; b->nsets = 0; b->sib_u64s[0] = 0; b->sib_u64s[1] = 0; b->whiteout_u64s = 0; bch2_btree_keys_init(b); set_btree_node_accessed(b); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], start_time); memalloc_nofs_restore(flags); return b; err: mutex_lock(&bc->lock); /* Try to cannibalize another cached btree node: */ if (bc->alloc_lock == current) { b2 = btree_node_cannibalize(c); clear_btree_node_just_written(b2); bch2_btree_node_hash_remove(bc, b2); if (b) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); } else { b = b2; list_del_init(&b->list); } mutex_unlock(&bc->lock); trace_and_count(c, btree_cache_cannibalize, trans); goto out; } mutex_unlock(&bc->lock); memalloc_nofs_restore(flags); return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc); } /* Slowpath, don't want it inlined into btree_iter_traverse() */ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level, enum six_lock_type lock_type, bool sync) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; if (unlikely(level >= BTREE_MAX_DEPTH)) { int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u", level, BTREE_MAX_DEPTH); return ERR_PTR(ret); } if (unlikely(!bkey_is_btree_ptr(&k->k))) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); printbuf_exit(&buf); return ERR_PTR(ret); } if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); printbuf_exit(&buf); return ERR_PTR(ret); } /* * Parent node must be locked, else we could read in a btree node that's * been freed: */ if (path && !bch2_btree_node_relock(trans, path, level + 1)) { trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); } b = bch2_btree_node_mem_alloc(trans, level != 0); if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) { if (!path) return b; trans->memory_allocation_failure = true; trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail)); } if (IS_ERR(b)) return b; bkey_copy(&b->key, k); if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ /* mark as unhashed... */ b->hash_val = 0; mutex_lock(&bc->lock); list_add(&b->list, &bc->freeable); mutex_unlock(&bc->lock); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); return NULL; } set_btree_node_read_in_flight(b); six_unlock_write(&b->c.lock); if (path) { u32 seq = six_lock_seq(&b->c.lock); /* Unlock before doing IO: */ six_unlock_intent(&b->c.lock); bch2_trans_unlock_noassert(trans); bch2_btree_node_read(trans, b, sync); if (!sync) return NULL; if (!six_relock_type(&b->c.lock, lock_type, seq)) b = NULL; } else { bch2_btree_node_read(trans, b, sync); if (lock_type == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } return b; } static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { struct printbuf buf = PRINTBUF; if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) return; prt_printf(&buf, "btree node header doesn't match ptr\n" "btree %s level %u\n" "ptr: ", bch2_btree_id_str(b->c.btree_id), b->c.level); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_printf(&buf, "\nheader: btree %s level %llu\n" "min ", bch2_btree_id_str(BTREE_NODE_ID(b->data)), BTREE_NODE_LEVEL(b->data)); bch2_bpos_to_text(&buf, b->data->min_key); prt_printf(&buf, "\nmax "); bch2_bpos_to_text(&buf, b->data->max_key); bch2_fs_topology_error(c, "%s", buf.buf); printbuf_exit(&buf); } static inline void btree_check_header(struct bch_fs *c, struct btree *b) { if (b->c.btree_id != BTREE_NODE_ID(b->data) || b->c.level != BTREE_NODE_LEVEL(b->data) || !bpos_eq(b->data->max_key, b->key.k.p) || (b->key.k.type == KEY_TYPE_btree_ptr_v2 && !bpos_eq(b->data->min_key, bkey_i_to_btree_ptr_v2(&b->key)->v.min_key))) btree_bad_header(c, b); } static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; bool need_relock = false; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { /* * We must have the parent locked to call bch2_btree_node_fill(), * else we could read in a btree node from disk that's been * freed: */ b = bch2_btree_node_fill(trans, path, k, path->btree_id, level, lock_type, true); need_relock = true; /* We raced and found the btree node in the cache */ if (!b) goto retry; if (IS_ERR(b)) return b; } else { if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); } if (unlikely(btree_node_read_in_flight(b))) { u32 seq = six_lock_seq(&b->c.lock); six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(trans); need_relock = true; bch2_btree_node_wait_on_read(b); /* * should_be_locked is not set on this path yet, so we need to * relock it specifically: */ if (!six_relock_type(&b->c.lock, lock_type, seq)) goto retry; } if (unlikely(need_relock)) { ret = bch2_trans_relock(trans) ?: bch2_btree_path_relock_intent(trans, path); if (ret) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(ret); } } prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(-BCH_ERR_btree_node_read_error); } EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); return b; } /** * bch2_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * * @trans: btree transaction object * @path: btree_path being traversed * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2) * @level: level of btree node being looked up (0 == leaf node) * @lock_type: SIX_LOCK_read or SIX_LOCK_intent * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek()) * * The btree node will have either a read or a write lock held, depending on * the @write parameter. * * Returns: btree node or ERR_PTR() */ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree *b; struct bset_tree *t; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); b = btree_node_mem_ptr(k); /* * Check b->hash_val _before_ calling btree_node_lock() - this might not * be the node we want anymore, and trying to lock the wrong node could * cause an unneccessary transaction restart: */ if (unlikely(!c->opts.btree_node_mem_ptr_optimization || !b || b->hash_val != btree_ptr_hash_val(k))) return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); if (bch2_btree_node_relock(trans, path, level + 1)) return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } if (unlikely(btree_node_read_in_flight(b))) { six_unlock_type(&b->c.lock, lock_type); return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); } prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(-BCH_ERR_btree_node_read_error); } EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); return b; } struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, const struct bkey_i *k, enum btree_id btree_id, unsigned level, bool nofill) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); if (c->opts.btree_node_mem_ptr_optimization) { b = btree_node_mem_ptr(k); if (b) goto lock_node; } retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { if (nofill) goto out; b = bch2_btree_node_fill(trans, NULL, k, btree_id, level, SIX_LOCK_read, true); /* We raced and found the btree node in the cache */ if (!b) goto retry; if (IS_ERR(b) && !bch2_btree_cache_cannibalize_lock(trans, NULL)) goto retry; if (IS_ERR(b)) goto out; } else { lock_node: ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.btree_id != btree_id || b->c.level != level)) { six_unlock_read(&b->c.lock); goto retry; } } /* XXX: waiting on IO with btree locks held: */ __bch2_btree_node_wait_on_read(b); prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); b = ERR_PTR(-BCH_ERR_btree_node_read_error); goto out; } EBUG_ON(b->c.btree_id != btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); out: bch2_btree_cache_cannibalize_unlock(trans); return b; } int bch2_btree_node_prefetch(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; BUG_ON(path && !btree_node_locked(path, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); struct btree *b = btree_cache_find(bc, k); if (b) return 0; b = bch2_btree_node_fill(trans, path, k, btree_id, level, SIX_LOCK_read, false); if (!IS_ERR_OR_NULL(b)) six_unlock_read(&b->c.lock); return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b); } void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; b = btree_cache_find(bc, k); if (!b) return; BUG_ON(b == btree_node_root(trans->c, b)); wait_on_io: /* not allowed to wait on io with btree locks held: */ /* XXX we're called from btree_gc which will be holding other btree * nodes locked */ __bch2_btree_node_wait_on_read(b); __bch2_btree_node_wait_on_write(b); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); if (unlikely(b->hash_val != btree_ptr_hash_val(k))) goto out; if (btree_node_dirty(b)) { __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } BUG_ON(btree_node_dirty(b)); mutex_lock(&bc->lock); btree_node_data_free(c, b); bch2_btree_node_hash_remove(bc, b); mutex_unlock(&bc->lock); out: six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } const char *bch2_btree_id_str(enum btree_id btree) { return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)"; } void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { prt_printf(out, "%s level %u/%u\n ", bch2_btree_id_str(b->c.btree_id), b->c.level, bch2_btree_id_root(c, b->c.btree_id)->level); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { struct bset_stats stats; memset(&stats, 0, sizeof(stats)); bch2_btree_keys_stats(b, &stats); prt_printf(out, "l %u ", b->c.level); bch2_bpos_to_text(out, b->data->min_key); prt_printf(out, " - "); bch2_bpos_to_text(out, b->data->max_key); prt_printf(out, ":\n" " ptrs: "); bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); prt_newline(out); prt_printf(out, " format: "); bch2_bkey_format_to_text(out, &b->format); prt_printf(out, " unpack fn len: %u\n" " bytes used %zu/%zu (%zu%% full)\n" " sib u64s: %u, %u (merge threshold %u)\n" " nr packed keys %u\n" " nr unpacked keys %u\n" " floats %zu\n" " failed unpacked %zu\n", b->unpack_fn_len, b->nr.live_u64s * sizeof(u64), btree_buf_bytes(b) - sizeof(struct btree_node), b->nr.live_u64s * 100 / btree_max_u64s(c), b->sib_u64s[0], b->sib_u64s[1], c->btree_foreground_merge_threshold, b->nr.packed_keys, b->nr.unpacked_keys, stats.floats, stats.failed); } void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c) { prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used); prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty)); prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock); }
3 3 1 3 1 1 1 1 1 3 3 1 1 3 1 1 1 1 1 1 1 1 2 2 1 1 2 1 1 1 1 13 12 11 13 10 9 1 3 8 6 3 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/if_arp.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_tables.h> struct nft_cmp_expr { struct nft_data data; u8 sreg; u8 len; enum nft_cmp_ops op:8; }; void nft_cmp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); int d; d = memcmp(&regs->data[priv->sreg], &priv->data, priv->len); switch (priv->op) { case NFT_CMP_EQ: if (d != 0) goto mismatch; break; case NFT_CMP_NEQ: if (d == 0) goto mismatch; break; case NFT_CMP_LT: if (d == 0) goto mismatch; fallthrough; case NFT_CMP_LTE: if (d > 0) goto mismatch; break; case NFT_CMP_GT: if (d == 0) goto mismatch; fallthrough; case NFT_CMP_GTE: if (d < 0) goto mismatch; break; } return; mismatch: regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { [NFTA_CMP_SREG] = { .type = NLA_U32 }, [NFTA_CMP_OP] = { .type = NLA_U32 }, [NFTA_CMP_DATA] = { .type = NLA_NESTED }, }; static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(priv->data), }; int err; err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; } static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) goto nla_put_failure; if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, NFT_DATA_VALUE, priv->len) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } union nft_cmp_offload_data { u16 val16; u32 val32; u64 val64; }; static void nft_payload_n2h(union nft_cmp_offload_data *data, const u8 *val, u32 len) { switch (len) { case 2: data->val16 = ntohs(*((__be16 *)val)); break; case 4: data->val32 = ntohl(*((__be32 *)val)); break; case 8: data->val64 = be64_to_cpu(*((__be64 *)val)); break; default: WARN_ON_ONCE(1); break; } } static int __nft_cmp_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_cmp_expr *priv) { struct nft_offload_reg *reg = &ctx->regs[priv->sreg]; union nft_cmp_offload_data _data, _datamask; u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; u8 *data, *datamask; if (priv->op != NFT_CMP_EQ || priv->len > reg->len) return -EOPNOTSUPP; if (reg->flags & NFT_OFFLOAD_F_NETWORK2HOST) { nft_payload_n2h(&_data, (u8 *)&priv->data, reg->len); nft_payload_n2h(&_datamask, (u8 *)&reg->mask, reg->len); data = (u8 *)&_data; datamask = (u8 *)&_datamask; } else { data = (u8 *)&priv->data; datamask = (u8 *)&reg->mask; } memcpy(key + reg->offset, data, reg->len); memcpy(mask + reg->offset, datamask, reg->len); flow->match.dissector.used_keys |= BIT_ULL(reg->key); flow->match.dissector.offset[reg->key] = reg->base_offset; if (reg->key == FLOW_DISSECTOR_KEY_META && reg->offset == offsetof(struct nft_flow_key, meta.ingress_iftype) && nft_reg_load16(priv->data.data) != ARPHRD_ETHER) return -EOPNOTSUPP; nft_offload_update_dependency(ctx, &priv->data, reg->len); return 0; } static int nft_cmp_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); return __nft_cmp_offload(ctx, flow, priv); } static const struct nft_expr_ops nft_cmp_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_offload, }; /* Calculate the mask for the nft_cmp_fast expression. On big endian the * mask needs to include the *upper* bytes when interpreting that data as * something smaller than the full u32, therefore a cpu_to_le32 is done. */ static u32 nft_cmp_fast_mask(unsigned int len) { __le32 mask = cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr, data) * BITS_PER_BYTE - len)); return (__force u32)mask; } static int nft_cmp_fast_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_data data; struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(data), }; int err; err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; desc.len *= BITS_PER_BYTE; priv->mask = nft_cmp_fast_mask(desc.len); priv->data = data.data[0] & priv->mask; priv->len = desc.len; priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ; return 0; } static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_cmp_expr cmp = { .data = { .data = { [0] = priv->data, }, }, .sreg = priv->sreg, .len = priv->len / BITS_PER_BYTE, .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ, }; return __nft_cmp_offload(ctx, flow, &cmp); } static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; struct nft_data data; if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op))) goto nla_put_failure; data.data[0] = priv->data; if (nft_data_dump(skb, NFTA_CMP_DATA, &data, NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } const struct nft_expr_ops nft_cmp_fast_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)), .eval = NULL, /* inlined */ .init = nft_cmp_fast_init, .dump = nft_cmp_fast_dump, .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_fast_offload, }; static u32 nft_cmp_mask(u32 bitlen) { return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen)); } static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen) { int len = bitlen / BITS_PER_BYTE; int i, words = len / sizeof(u32); for (i = 0; i < words; i++) { data->data[i] = 0xffffffff; bitlen -= sizeof(u32) * BITS_PER_BYTE; } if (len % sizeof(u32)) data->data[i++] = nft_cmp_mask(bitlen); for (; i < 4; i++) data->data[i] = 0; } static int nft_cmp16_fast_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(priv->data), }; int err; err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE); priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ; priv->len = desc.len; return 0; } static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); struct nft_cmp_expr cmp = { .data = priv->data, .sreg = priv->sreg, .len = priv->len, .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ, }; return __nft_cmp_offload(ctx, flow, &cmp); } static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op))) goto nla_put_failure; if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, NFT_DATA_VALUE, priv->len) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } const struct nft_expr_ops nft_cmp16_fast_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)), .eval = NULL, /* inlined */ .init = nft_cmp16_fast_init, .dump = nft_cmp16_fast_dump, .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp16_fast_offload, }; static const struct nft_expr_ops * nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data data; struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(data), }; enum nft_cmp_ops op; u8 sreg; int err; if (tb[NFTA_CMP_SREG] == NULL || tb[NFTA_CMP_OP] == NULL || tb[NFTA_CMP_DATA] == NULL) return ERR_PTR(-EINVAL); op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); switch (op) { case NFT_CMP_EQ: case NFT_CMP_NEQ: case NFT_CMP_LT: case NFT_CMP_LTE: case NFT_CMP_GT: case NFT_CMP_GTE: break; default: return ERR_PTR(-EINVAL); } err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return ERR_PTR(err); sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) { if (desc.len <= sizeof(u32)) return &nft_cmp_fast_ops; else if (desc.len <= sizeof(data) && ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) || (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0))) return &nft_cmp16_fast_ops; } return &nft_cmp_ops; } struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, };
42 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * include/uapi/linux/tipc_config.h: Header for TIPC configuration interface * * Copyright (c) 2003-2006, Ericsson AB * Copyright (c) 2005-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_TIPC_CONFIG_H_ #define _LINUX_TIPC_CONFIG_H_ #include <linux/types.h> #include <linux/string.h> #include <linux/tipc.h> #include <asm/byteorder.h> /* * Configuration * * All configuration management messaging involves sending a request message * to the TIPC configuration service on a node, which sends a reply message * back. (In the future multi-message replies may be supported.) * * Both request and reply messages consist of a transport header and payload. * The transport header contains info about the desired operation; * the payload consists of zero or more type/length/value (TLV) items * which specify parameters or results for the operation. * * For many operations, the request and reply messages have a fixed number * of TLVs (usually zero or one); however, some reply messages may return * a variable number of TLVs. A failed request is denoted by the presence * of an "error string" TLV in the reply message instead of the TLV(s) the * reply should contain if the request succeeds. */ /* * Public commands: * May be issued by any process. * Accepted by own node, or by remote node only if remote management enabled. */ #define TIPC_CMD_NOOP 0x0000 /* tx none, rx none */ #define TIPC_CMD_GET_NODES 0x0001 /* tx net_addr, rx node_info(s) */ #define TIPC_CMD_GET_MEDIA_NAMES 0x0002 /* tx none, rx media_name(s) */ #define TIPC_CMD_GET_BEARER_NAMES 0x0003 /* tx none, rx bearer_name(s) */ #define TIPC_CMD_GET_LINKS 0x0004 /* tx net_addr, rx link_info(s) */ #define TIPC_CMD_SHOW_NAME_TABLE 0x0005 /* tx name_tbl_query, rx ultra_string */ #define TIPC_CMD_SHOW_PORTS 0x0006 /* tx none, rx ultra_string */ #define TIPC_CMD_SHOW_LINK_STATS 0x000B /* tx link_name, rx ultra_string */ #define TIPC_CMD_SHOW_STATS 0x000F /* tx unsigned, rx ultra_string */ /* * Protected commands: * May only be issued by "network administration capable" process. * Accepted by own node, or by remote node only if remote management enabled * and this node is zone manager. */ #define TIPC_CMD_GET_REMOTE_MNG 0x4003 /* tx none, rx unsigned */ #define TIPC_CMD_GET_MAX_PORTS 0x4004 /* tx none, rx unsigned */ #define TIPC_CMD_GET_MAX_PUBL 0x4005 /* obsoleted */ #define TIPC_CMD_GET_MAX_SUBSCR 0x4006 /* obsoleted */ #define TIPC_CMD_GET_MAX_ZONES 0x4007 /* obsoleted */ #define TIPC_CMD_GET_MAX_CLUSTERS 0x4008 /* obsoleted */ #define TIPC_CMD_GET_MAX_NODES 0x4009 /* obsoleted */ #define TIPC_CMD_GET_MAX_SLAVES 0x400A /* obsoleted */ #define TIPC_CMD_GET_NETID 0x400B /* tx none, rx unsigned */ #define TIPC_CMD_ENABLE_BEARER 0x4101 /* tx bearer_config, rx none */ #define TIPC_CMD_DISABLE_BEARER 0x4102 /* tx bearer_name, rx none */ #define TIPC_CMD_SET_LINK_TOL 0x4107 /* tx link_config, rx none */ #define TIPC_CMD_SET_LINK_PRI 0x4108 /* tx link_config, rx none */ #define TIPC_CMD_SET_LINK_WINDOW 0x4109 /* tx link_config, rx none */ #define TIPC_CMD_SET_LOG_SIZE 0x410A /* obsoleted */ #define TIPC_CMD_DUMP_LOG 0x410B /* obsoleted */ #define TIPC_CMD_RESET_LINK_STATS 0x410C /* tx link_name, rx none */ /* * Private commands: * May only be issued by "network administration capable" process. * Accepted by own node only; cannot be used on a remote node. */ #define TIPC_CMD_SET_NODE_ADDR 0x8001 /* tx net_addr, rx none */ #define TIPC_CMD_SET_REMOTE_MNG 0x8003 /* tx unsigned, rx none */ #define TIPC_CMD_SET_MAX_PORTS 0x8004 /* tx unsigned, rx none */ #define TIPC_CMD_SET_MAX_PUBL 0x8005 /* obsoleted */ #define TIPC_CMD_SET_MAX_SUBSCR 0x8006 /* obsoleted */ #define TIPC_CMD_SET_MAX_ZONES 0x8007 /* obsoleted */ #define TIPC_CMD_SET_MAX_CLUSTERS 0x8008 /* obsoleted */ #define TIPC_CMD_SET_MAX_NODES 0x8009 /* obsoleted */ #define TIPC_CMD_SET_MAX_SLAVES 0x800A /* obsoleted */ #define TIPC_CMD_SET_NETID 0x800B /* tx unsigned, rx none */ /* * Reserved commands: * May not be issued by any process. * Used internally by TIPC. */ #define TIPC_CMD_NOT_NET_ADMIN 0xC001 /* tx none, rx none */ /* * TLV types defined for TIPC */ #define TIPC_TLV_NONE 0 /* no TLV present */ #define TIPC_TLV_VOID 1 /* empty TLV (0 data bytes)*/ #define TIPC_TLV_UNSIGNED 2 /* 32-bit integer */ #define TIPC_TLV_STRING 3 /* char[128] (max) */ #define TIPC_TLV_LARGE_STRING 4 /* char[2048] (max) */ #define TIPC_TLV_ULTRA_STRING 5 /* char[32768] (max) */ #define TIPC_TLV_ERROR_STRING 16 /* char[128] containing "error code" */ #define TIPC_TLV_NET_ADDR 17 /* 32-bit integer denoting <Z.C.N> */ #define TIPC_TLV_MEDIA_NAME 18 /* char[TIPC_MAX_MEDIA_NAME] */ #define TIPC_TLV_BEARER_NAME 19 /* char[TIPC_MAX_BEARER_NAME] */ #define TIPC_TLV_LINK_NAME 20 /* char[TIPC_MAX_LINK_NAME] */ #define TIPC_TLV_NODE_INFO 21 /* struct tipc_node_info */ #define TIPC_TLV_LINK_INFO 22 /* struct tipc_link_info */ #define TIPC_TLV_BEARER_CONFIG 23 /* struct tipc_bearer_config */ #define TIPC_TLV_LINK_CONFIG 24 /* struct tipc_link_config */ #define TIPC_TLV_NAME_TBL_QUERY 25 /* struct tipc_name_table_query */ #define TIPC_TLV_PORT_REF 26 /* 32-bit port reference */ /* * Link priority limits (min, default, max, media default) */ #define TIPC_MIN_LINK_PRI 0 #define TIPC_DEF_LINK_PRI 10 #define TIPC_MAX_LINK_PRI 31 #define TIPC_MEDIA_LINK_PRI (TIPC_MAX_LINK_PRI + 1) /* * Link tolerance limits (min, default, max), in ms */ #define TIPC_MIN_LINK_TOL 50 #define TIPC_DEF_LINK_TOL 1500 #define TIPC_MAX_LINK_TOL 30000 #if (TIPC_MIN_LINK_TOL < 16) #error "TIPC_MIN_LINK_TOL is too small (abort limit may be NaN)" #endif /* * Link window limits (min, default, max), in packets */ #define TIPC_MIN_LINK_WIN 16 #define TIPC_DEF_LINK_WIN 50 #define TIPC_MAX_LINK_WIN 8191 /* * Default MTU for UDP media */ #define TIPC_DEF_LINK_UDP_MTU 14000 struct tipc_node_info { __be32 addr; /* network address of node */ __be32 up; /* 0=down, 1= up */ }; struct tipc_link_info { __be32 dest; /* network address of peer node */ __be32 up; /* 0=down, 1=up */ char str[TIPC_MAX_LINK_NAME]; /* link name */ }; struct tipc_bearer_config { __be32 priority; /* Range [1,31]. Override per link */ __be32 disc_domain; /* <Z.C.N> describing desired nodes */ char name[TIPC_MAX_BEARER_NAME]; }; struct tipc_link_config { __be32 value; char name[TIPC_MAX_LINK_NAME]; }; #define TIPC_NTQ_ALLTYPES 0x80000000 struct tipc_name_table_query { __be32 depth; /* 1:type, 2:+name info, 3:+port info, 4+:+debug info */ __be32 type; /* {t,l,u} info ignored if high bit of "depth" is set */ __be32 lowbound; /* (i.e. displays all entries of name table) */ __be32 upbound; }; /* * The error string TLV is a null-terminated string describing the cause * of the request failure. To simplify error processing (and to save space) * the first character of the string can be a special error code character * (lying by the range 0x80 to 0xFF) which represents a pre-defined reason. */ #define TIPC_CFG_TLV_ERROR "\x80" /* request contains incorrect TLV(s) */ #define TIPC_CFG_NOT_NET_ADMIN "\x81" /* must be network administrator */ #define TIPC_CFG_NOT_ZONE_MSTR "\x82" /* must be zone master */ #define TIPC_CFG_NO_REMOTE "\x83" /* remote management not enabled */ #define TIPC_CFG_NOT_SUPPORTED "\x84" /* request is not supported by TIPC */ #define TIPC_CFG_INVALID_VALUE "\x85" /* request has invalid argument value */ /* * A TLV consists of a descriptor, followed by the TLV value. * TLV descriptor fields are stored in network byte order; * TLV values must also be stored in network byte order (where applicable). * TLV descriptors must be aligned to addresses which are multiple of 4, * so up to 3 bytes of padding may exist at the end of the TLV value area. * There must not be any padding between the TLV descriptor and its value. */ struct tlv_desc { __be16 tlv_len; /* TLV length (descriptor + value) */ __be16 tlv_type; /* TLV identifier */ }; #define TLV_ALIGNTO 4 #define TLV_ALIGN(datalen) (((datalen)+(TLV_ALIGNTO-1)) & ~(TLV_ALIGNTO-1)) #define TLV_LENGTH(datalen) (sizeof(struct tlv_desc) + (datalen)) #define TLV_SPACE(datalen) (TLV_ALIGN(TLV_LENGTH(datalen))) #define TLV_DATA(tlv) ((void *)((char *)(tlv) + TLV_LENGTH(0))) static inline int TLV_OK(const void *tlv, __u16 space) { /* * Would also like to check that "tlv" is a multiple of 4, * but don't know how to do this in a portable way. * - Tried doing (!(tlv & (TLV_ALIGNTO-1))), but GCC compiler * won't allow binary "&" with a pointer. * - Tried casting "tlv" to integer type, but causes warning about size * mismatch when pointer is bigger than chosen type (int, long, ...). */ return (space >= TLV_SPACE(0)) && (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space); } static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type) { return TLV_OK(tlv, space) && (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type); } static inline int TLV_GET_LEN(struct tlv_desc *tlv) { return __be16_to_cpu(tlv->tlv_len); } static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len) { tlv->tlv_len = __cpu_to_be16(len); } static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv, __u16 type) { return (__be16_to_cpu(tlv->tlv_type) == type); } static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type) { tlv->tlv_type = __cpu_to_be16(type); } static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) { struct tlv_desc *tlv_ptr; int tlv_len; tlv_len = TLV_LENGTH(len); tlv_ptr = (struct tlv_desc *)tlv; tlv_ptr->tlv_type = __cpu_to_be16(type); tlv_ptr->tlv_len = __cpu_to_be16(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); } return TLV_SPACE(len); } /* * A TLV list descriptor simplifies processing of messages * containing multiple TLVs. */ struct tlv_list_desc { struct tlv_desc *tlv_ptr; /* ptr to current TLV */ __u32 tlv_space; /* # bytes from curr TLV to list end */ }; static inline void TLV_LIST_INIT(struct tlv_list_desc *list, void *data, __u32 space) { list->tlv_ptr = (struct tlv_desc *)data; list->tlv_space = space; } static inline int TLV_LIST_EMPTY(struct tlv_list_desc *list) { return (list->tlv_space == 0); } static inline int TLV_LIST_CHECK(struct tlv_list_desc *list, __u16 exp_type) { return TLV_CHECK(list->tlv_ptr, list->tlv_space, exp_type); } static inline void *TLV_LIST_DATA(struct tlv_list_desc *list) { return TLV_DATA(list->tlv_ptr); } static inline void TLV_LIST_STEP(struct tlv_list_desc *list) { __u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len)); list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space); list->tlv_space -= tlv_space; } /* * Configuration messages exchanged via NETLINK_GENERIC use the following * family id, name, version and command. */ #define TIPC_GENL_NAME "TIPC" #define TIPC_GENL_VERSION 0x1 #define TIPC_GENL_CMD 0x1 /* * TIPC specific header used in NETLINK_GENERIC requests. */ struct tipc_genlmsghdr { __u32 dest; /* Destination address */ __u16 cmd; /* Command */ __u16 reserved; /* Unused */ }; #define TIPC_GENL_HDRLEN NLMSG_ALIGN(sizeof(struct tipc_genlmsghdr)) /* * Configuration messages exchanged via TIPC sockets use the TIPC configuration * message header, which is defined below. This structure is analogous * to the Netlink message header, but fields are stored in network byte order * and no padding is permitted between the header and the message data * that follows. */ struct tipc_cfg_msg_hdr { __be32 tcm_len; /* Message length (including header) */ __be16 tcm_type; /* Command type */ __be16 tcm_flags; /* Additional flags */ char tcm_reserved[8]; /* Unused */ }; #define TCM_F_REQUEST 0x1 /* Flag: Request message */ #define TCM_F_MORE 0x2 /* Flag: Message to be continued */ #define TCM_ALIGN(datalen) (((datalen)+3) & ~3) #define TCM_LENGTH(datalen) (sizeof(struct tipc_cfg_msg_hdr) + datalen) #define TCM_SPACE(datalen) (TCM_ALIGN(TCM_LENGTH(datalen))) #define TCM_DATA(tcm_hdr) ((void *)((char *)(tcm_hdr) + TCM_LENGTH(0))) static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, void *data, __u16 data_len) { struct tipc_cfg_msg_hdr *tcm_hdr; int msg_len; msg_len = TCM_LENGTH(data_len); tcm_hdr = (struct tipc_cfg_msg_hdr *)msg; tcm_hdr->tcm_len = __cpu_to_be32(msg_len); tcm_hdr->tcm_type = __cpu_to_be16(cmd); tcm_hdr->tcm_flags = __cpu_to_be16(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); } return TCM_SPACE(data_len); } #endif
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_SWAB_H #define _UAPI_LINUX_SWAB_H #include <linux/types.h> #include <linux/stddef.h> #include <asm/bitsperlong.h> #include <asm/swab.h> /* * casts are necessary for constants, because we never know how for sure * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. */ #define ___constant_swab16(x) ((__u16)( \ (((__u16)(x) & (__u16)0x00ffU) << 8) | \ (((__u16)(x) & (__u16)0xff00U) >> 8))) #define ___constant_swab32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ (((__u32)(x) & (__u32)0xff000000UL) >> 24))) #define ___constant_swab64(x) ((__u64)( \ (((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \ (((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \ (((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \ (((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \ (((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \ (((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ (((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \ (((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56))) #define ___constant_swahw32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \ (((__u32)(x) & (__u32)0xffff0000UL) >> 16))) #define ___constant_swahb32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \ (((__u32)(x) & (__u32)0xff00ff00UL) >> 8))) /* * Implement the following as inlines, but define the interface using * macros to allow constant folding when possible: * ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32 */ static inline __attribute_const__ __u16 __fswab16(__u16 val) { #if defined (__arch_swab16) return __arch_swab16(val); #else return ___constant_swab16(val); #endif } static inline __attribute_const__ __u32 __fswab32(__u32 val) { #if defined(__arch_swab32) return __arch_swab32(val); #else return ___constant_swab32(val); #endif } static inline __attribute_const__ __u64 __fswab64(__u64 val) { #if defined (__arch_swab64) return __arch_swab64(val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; __u32 l = val & ((1ULL << 32) - 1); return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h))); #else return ___constant_swab64(val); #endif } static inline __attribute_const__ __u32 __fswahw32(__u32 val) { #ifdef __arch_swahw32 return __arch_swahw32(val); #else return ___constant_swahw32(val); #endif } static inline __attribute_const__ __u32 __fswahb32(__u32 val) { #ifdef __arch_swahb32 return __arch_swahb32(val); #else return ___constant_swahb32(val); #endif } /** * __swab16 - return a byteswapped 16-bit value * @x: value to byteswap */ #ifdef __HAVE_BUILTIN_BSWAP16__ #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) #else #define __swab16(x) \ (__u16)(__builtin_constant_p(x) ? \ ___constant_swab16(x) : \ __fswab16(x)) #endif /** * __swab32 - return a byteswapped 32-bit value * @x: value to byteswap */ #ifdef __HAVE_BUILTIN_BSWAP32__ #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x)) #else #define __swab32(x) \ (__u32)(__builtin_constant_p(x) ? \ ___constant_swab32(x) : \ __fswab32(x)) #endif /** * __swab64 - return a byteswapped 64-bit value * @x: value to byteswap */ #ifdef __HAVE_BUILTIN_BSWAP64__ #define __swab64(x) (__u64)__builtin_bswap64((__u64)(x)) #else #define __swab64(x) \ (__u64)(__builtin_constant_p(x) ? \ ___constant_swab64(x) : \ __fswab64(x)) #endif static __always_inline unsigned long __swab(const unsigned long y) { #if __BITS_PER_LONG == 64 return __swab64(y); #else /* __BITS_PER_LONG == 32 */ return __swab32(y); #endif } /** * __swahw32 - return a word-swapped 32-bit value * @x: value to wordswap * * __swahw32(0x12340000) is 0x00001234 */ #define __swahw32(x) \ (__builtin_constant_p((__u32)(x)) ? \ ___constant_swahw32(x) : \ __fswahw32(x)) /** * __swahb32 - return a high and low byte-swapped 32-bit value * @x: value to byteswap * * __swahb32(0x12345678) is 0x34127856 */ #define __swahb32(x) \ (__builtin_constant_p((__u32)(x)) ? \ ___constant_swahb32(x) : \ __fswahb32(x)) /** * __swab16p - return a byteswapped 16-bit value from a pointer * @p: pointer to a naturally-aligned 16-bit value */ static __always_inline __u16 __swab16p(const __u16 *p) { #ifdef __arch_swab16p return __arch_swab16p(p); #else return __swab16(*p); #endif } /** * __swab32p - return a byteswapped 32-bit value from a pointer * @p: pointer to a naturally-aligned 32-bit value */ static __always_inline __u32 __swab32p(const __u32 *p) { #ifdef __arch_swab32p return __arch_swab32p(p); #else return __swab32(*p); #endif } /** * __swab64p - return a byteswapped 64-bit value from a pointer * @p: pointer to a naturally-aligned 64-bit value */ static __always_inline __u64 __swab64p(const __u64 *p) { #ifdef __arch_swab64p return __arch_swab64p(p); #else return __swab64(*p); #endif } /** * __swahw32p - return a wordswapped 32-bit value from a pointer * @p: pointer to a naturally-aligned 32-bit value * * See __swahw32() for details of wordswapping. */ static inline __u32 __swahw32p(const __u32 *p) { #ifdef __arch_swahw32p return __arch_swahw32p(p); #else return __swahw32(*p); #endif } /** * __swahb32p - return a high and low byteswapped 32-bit value from a pointer * @p: pointer to a naturally-aligned 32-bit value * * See __swahb32() for details of high/low byteswapping. */ static inline __u32 __swahb32p(const __u32 *p) { #ifdef __arch_swahb32p return __arch_swahb32p(p); #else return __swahb32(*p); #endif } /** * __swab16s - byteswap a 16-bit value in-place * @p: pointer to a naturally-aligned 16-bit value */ static inline void __swab16s(__u16 *p) { #ifdef __arch_swab16s __arch_swab16s(p); #else *p = __swab16p(p); #endif } /** * __swab32s - byteswap a 32-bit value in-place * @p: pointer to a naturally-aligned 32-bit value */ static __always_inline void __swab32s(__u32 *p) { #ifdef __arch_swab32s __arch_swab32s(p); #else *p = __swab32p(p); #endif } /** * __swab64s - byteswap a 64-bit value in-place * @p: pointer to a naturally-aligned 64-bit value */ static __always_inline void __swab64s(__u64 *p) { #ifdef __arch_swab64s __arch_swab64s(p); #else *p = __swab64p(p); #endif } /** * __swahw32s - wordswap a 32-bit value in-place * @p: pointer to a naturally-aligned 32-bit value * * See __swahw32() for details of wordswapping */ static inline void __swahw32s(__u32 *p) { #ifdef __arch_swahw32s __arch_swahw32s(p); #else *p = __swahw32p(p); #endif } /** * __swahb32s - high and low byteswap a 32-bit value in-place * @p: pointer to a naturally-aligned 32-bit value * * See __swahb32() for details of high and low byte swapping */ static inline void __swahb32s(__u32 *p) { #ifdef __arch_swahb32s __arch_swahb32s(p); #else *p = __swahb32p(p); #endif } #endif /* _UAPI_LINUX_SWAB_H */
5 5 5 1 5 5 5 5 5 5 1 1 5 5 5 5 6 6 1 6 6 1 1 1 1 1 1 1 1 1 1 1 1 1 5 5 5 8 8 8 1 1 1 1 4 4 4 3 3 3 1 1 14 742 5 5 1 5 5 5 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 // SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * This file contains device methods for creating, using and destroying * virtual HSR or PRP devices. */ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> #include "hsr_device.h" #include "hsr_slave.h" #include "hsr_framereg.h" #include "hsr_main.h" #include "hsr_forward.h" static bool is_admin_up(struct net_device *dev) { return dev && (dev->flags & IFF_UP); } static bool is_slave_up(struct net_device *dev) { return dev && is_admin_up(dev) && netif_oper_up(dev); } static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) { struct net_device *dev = master->dev; if (!is_admin_up(dev)) { netdev_set_operstate(dev, IF_OPER_DOWN); return; } if (has_carrier) netdev_set_operstate(dev, IF_OPER_UP); else netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); } static bool hsr_check_carrier(struct hsr_port *master) { struct hsr_port *port; ASSERT_RTNL(); hsr_for_each_port(master->hsr, port) { if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) { netif_carrier_on(master->dev); return true; } } netif_carrier_off(master->dev); return false; } static void hsr_check_announce(struct net_device *hsr_dev) { struct hsr_priv *hsr; hsr = netdev_priv(hsr_dev); if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) { /* Enable announce timer and start sending supervisory frames */ if (!timer_pending(&hsr->announce_timer)) { hsr->announce_count = 0; mod_timer(&hsr->announce_timer, jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } } else { /* Deactivate the announce timer */ timer_delete(&hsr->announce_timer); } } void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) { struct hsr_port *master; bool has_carrier; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); hsr_check_announce(master->dev); } int hsr_get_max_mtu(struct hsr_priv *hsr) { unsigned int mtu_max; struct hsr_port *port; mtu_max = ETH_DATA_LEN; hsr_for_each_port(hsr, port) if (port->type != HSR_PT_MASTER) mtu_max = min(port->dev->mtu, mtu_max); if (mtu_max < HSR_HLEN) return 0; return mtu_max - HSR_HLEN; } static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) { struct hsr_priv *hsr; hsr = netdev_priv(dev); if (new_mtu > hsr_get_max_mtu(hsr)) { netdev_info(dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", HSR_HLEN); return -EINVAL; } dev->mtu = new_mtu; return 0; } static int hsr_dev_open(struct net_device *dev) { struct hsr_priv *hsr; struct hsr_port *port; const char *designation = NULL; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: designation = "Slave A"; break; case HSR_PT_SLAVE_B: designation = "Slave B"; break; default: designation = "Unknown"; } if (!is_slave_up(port->dev)) netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n", designation, port->dev->name); } if (!designation) netdev_warn(dev, "No slave devices configured\n"); return 0; } static int hsr_dev_close(struct net_device *dev) { struct hsr_port *port; struct hsr_priv *hsr; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: case HSR_PT_SLAVE_B: dev_uc_unsync(port->dev, dev); dev_mc_unsync(port->dev, dev); break; default: break; } } return 0; } static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr, netdev_features_t features) { netdev_features_t mask; struct hsr_port *port; mask = features; /* Mask out all features that, if supported by one device, should be * enabled for all devices (see NETIF_F_ONE_FOR_ALL). * * Anything that's off in mask will not be enabled - so only things * that were in features originally, and also is in NETIF_F_ONE_FOR_ALL, * may become enabled. */ features &= ~NETIF_F_ONE_FOR_ALL; hsr_for_each_port(hsr, port) features = netdev_increment_features(features, port->dev->features, mask); return features; } static netdev_features_t hsr_fix_features(struct net_device *dev, netdev_features_t features) { struct hsr_priv *hsr = netdev_priv(dev); return hsr_features_recompute(hsr, features); } static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct hsr_priv *hsr = netdev_priv(dev); struct hsr_port *master; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (master) { skb->dev = master->dev; skb_reset_mac_header(skb); skb_reset_mac_len(skb); spin_lock_bh(&hsr->seqnr_lock); hsr_forward_skb(skb, master); spin_unlock_bh(&hsr->seqnr_lock); } else { dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); } return NETDEV_TX_OK; } static const struct header_ops hsr_header_ops = { .create = eth_header, .parse = eth_header_parse, }; static struct sk_buff *hsr_init_skb(struct hsr_port *master) { struct hsr_priv *hsr = master->hsr; struct sk_buff *skb; int hlen, tlen; hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; /* skb size is same for PRP/HSR frames, only difference * being, for PRP it is a trailer and for HSR it is a * header */ skb = dev_alloc_skb(sizeof(struct hsr_sup_tag) + sizeof(struct hsr_sup_payload) + hlen + tlen); if (!skb) return skb; skb_reserve(skb, hlen); skb->dev = master->dev; skb->priority = TC_PRIO_CONTROL; if (dev_hard_header(skb, skb->dev, ETH_P_PRP, hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); skb_reset_mac_len(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); return skb; out: kfree_skb(skb); return NULL; } static void send_hsr_supervision_frame(struct hsr_port *master, unsigned long *interval) { struct hsr_priv *hsr = master->hsr; __u8 type = HSR_TLV_LIFE_CHECK; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); if (hsr->announce_count < 3 && hsr->prot_version == 0) { type = HSR_TLV_ANNOUNCE; *interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); hsr->announce_count++; } skb = hsr_init_skb(master); if (!skb) { netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_bh(&hsr->seqnr_lock); if (hsr->prot_version > 0) { hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; } else { hsr_stag->sequence_nr = htons(hsr->sequence_nr); hsr->sequence_nr++; } hsr_stag->tlv.HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); if (skb_put_padto(skb, ETH_ZLEN)) { spin_unlock_bh(&hsr->seqnr_lock); return; } hsr_forward_skb(skb, master); spin_unlock_bh(&hsr->seqnr_lock); return; } static void send_prp_supervision_frame(struct hsr_port *master, unsigned long *interval) { struct hsr_priv *hsr = master->hsr; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; skb = hsr_init_skb(master); if (!skb) { netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0)); /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_bh(&hsr->seqnr_lock); hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload); /* Payload: MacAddressA */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); if (skb_put_padto(skb, ETH_ZLEN)) { spin_unlock_bh(&hsr->seqnr_lock); return; } hsr_forward_skb(skb, master); spin_unlock_bh(&hsr->seqnr_lock); } /* Announce (supervision frame) timer function */ static void hsr_announce(struct timer_list *t) { struct hsr_priv *hsr; struct hsr_port *master; unsigned long interval; hsr = from_timer(hsr, t, announce_timer); rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); hsr->proto_ops->send_sv_frame(master, &interval); if (is_admin_up(master->dev)) mod_timer(&hsr->announce_timer, jiffies + interval); rcu_read_unlock(); } void hsr_del_ports(struct hsr_priv *hsr) { struct hsr_port *port; port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); if (port) hsr_del_port(port); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); if (port) hsr_del_port(port); port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port) hsr_del_port(port); } static void hsr_set_rx_mode(struct net_device *dev) { struct hsr_port *port; struct hsr_priv *hsr; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: case HSR_PT_SLAVE_B: dev_mc_sync_multiple(port->dev, dev); dev_uc_sync_multiple(port->dev, dev); break; default: break; } } } static void hsr_change_rx_flags(struct net_device *dev, int change) { struct hsr_port *port; struct hsr_priv *hsr; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: case HSR_PT_SLAVE_B: if (change & IFF_ALLMULTI) dev_set_allmulti(port->dev, dev->flags & IFF_ALLMULTI ? 1 : -1); break; default: break; } } } static const struct net_device_ops hsr_device_ops = { .ndo_change_mtu = hsr_dev_change_mtu, .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, .ndo_change_rx_flags = hsr_change_rx_flags, .ndo_fix_features = hsr_fix_features, .ndo_set_rx_mode = hsr_set_rx_mode, }; static const struct device_type hsr_type = { .name = "hsr", }; static struct hsr_proto_ops hsr_ops = { .send_sv_frame = send_hsr_supervision_frame, .create_tagged_frame = hsr_create_tagged_frame, .get_untagged_frame = hsr_get_untagged_frame, .drop_frame = hsr_drop_frame, .fill_frame_info = hsr_fill_frame_info, .invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame, }; static struct hsr_proto_ops prp_ops = { .send_sv_frame = send_prp_supervision_frame, .create_tagged_frame = prp_create_tagged_frame, .get_untagged_frame = prp_get_untagged_frame, .drop_frame = prp_drop_frame, .fill_frame_info = prp_fill_frame_info, .handle_san_frame = prp_handle_san_frame, .update_san_info = prp_update_san_info, }; void hsr_dev_setup(struct net_device *dev) { eth_hw_addr_random(dev); ether_setup(dev); dev->min_mtu = 0; dev->header_ops = &hsr_header_ops; dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; dev->needs_free_netdev = true; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX; dev->features = dev->hw_features; /* Prevent recursive tx locking */ dev->features |= NETIF_F_LLTX; /* VLAN on top of HSR needs testing and probably some work on * hsr_header_create() etc. */ dev->features |= NETIF_F_VLAN_CHALLENGED; /* Not sure about this. Taken from bridge code. netdev_features.h says * it means "Does not change network namespaces". */ dev->features |= NETIF_F_NETNS_LOCAL; } /* Return true if dev is a HSR master; return false otherwise. */ bool is_hsr_master(struct net_device *dev) { return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit); } EXPORT_SYMBOL(is_hsr_master); /* Default multicast address for HSR Supervision frames */ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00 }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version, struct netlink_ext_ack *extack) { bool unregister = false; struct hsr_priv *hsr; int res; hsr = netdev_priv(hsr_dev); INIT_LIST_HEAD(&hsr->ports); INIT_LIST_HEAD(&hsr->node_db); spin_lock_init(&hsr->list_lock); eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); /* initialize protocol specific functions */ if (protocol_version == PRP_V1) { /* For PRP, lan_id has most significant 3 bits holding * the net_id of PRP_LAN_ID */ hsr->net_id = PRP_LAN_ID << 1; hsr->proto_ops = &prp_ops; } else { hsr->proto_ops = &hsr_ops; } /* Make sure we recognize frames from ourselves in hsr_rcv() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); if (res < 0) return res; spin_lock_init(&hsr->seqnr_lock); /* Overflow soon to find bugs easier: */ hsr->sequence_nr = HSR_SEQNR_START; hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; timer_setup(&hsr->announce_timer, hsr_announce, 0); timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; hsr->prot_version = protocol_version; /* Make sure the 1st call to netif_carrier_on() gets through */ netif_carrier_off(hsr_dev); res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER, extack); if (res) goto err_add_master; /* HSR forwarding offload supported in lower device? */ if ((slave[0]->features & NETIF_F_HW_HSR_FWD) && (slave[1]->features & NETIF_F_HW_HSR_FWD)) hsr->fwd_offloaded = true; res = register_netdevice(hsr_dev); if (res) goto err_unregister; unregister = true; res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack); if (res) goto err_unregister; res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack); if (res) goto err_unregister; hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); return 0; err_unregister: hsr_del_ports(hsr); err_add_master: hsr_del_self_node(hsr); if (unregister) unregister_netdevice(hsr_dev); return res; }
716 690 709 13 684 10 697 5 712 708 711 148 711 711 711 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* SPDX-License-Identifier: GPL-2.0-only */ /* * sha1_base.h - core logic for SHA-1 implementations * * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> */ #ifndef _CRYPTO_SHA1_BASE_H #define _CRYPTO_SHA1_BASE_H #include <crypto/internal/hash.h> #include <crypto/sha1.h> #include <linux/crypto.h> #include <linux/module.h> #include <linux/string.h> #include <asm/unaligned.h> typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks); static inline int sha1_base_init(struct shash_desc *desc) { struct sha1_state *sctx = shash_desc_ctx(desc); sctx->state[0] = SHA1_H0; sctx->state[1] = SHA1_H1; sctx->state[2] = SHA1_H2; sctx->state[3] = SHA1_H3; sctx->state[4] = SHA1_H4; sctx->count = 0; return 0; } static inline int sha1_base_do_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha1_block_fn *block_fn) { struct sha1_state *sctx = shash_desc_ctx(desc); unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; sctx->count += len; if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) { int blocks; if (partial) { int p = SHA1_BLOCK_SIZE - partial; memcpy(sctx->buffer + partial, data, p); data += p; len -= p; block_fn(sctx, sctx->buffer, 1); } blocks = len / SHA1_BLOCK_SIZE; len %= SHA1_BLOCK_SIZE; if (blocks) { block_fn(sctx, data, blocks); data += blocks * SHA1_BLOCK_SIZE; } partial = 0; } if (len) memcpy(sctx->buffer + partial, data, len); return 0; } static inline int sha1_base_do_finalize(struct shash_desc *desc, sha1_block_fn *block_fn) { const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); struct sha1_state *sctx = shash_desc_ctx(desc); __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; sctx->buffer[partial++] = 0x80; if (partial > bit_offset) { memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); partial = 0; block_fn(sctx, sctx->buffer, 1); } memset(sctx->buffer + partial, 0x0, bit_offset - partial); *bits = cpu_to_be64(sctx->count << 3); block_fn(sctx, sctx->buffer, 1); return 0; } static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) { struct sha1_state *sctx = shash_desc_ctx(desc); __be32 *digest = (__be32 *)out; int i; for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); memzero_explicit(sctx, sizeof(*sctx)); return 0; } #endif /* _CRYPTO_SHA1_BASE_H */
64 165 113 168 64 13 52 52 69 67 66 67 66 65 65 2 63 63 63 63 63 62 2 2 6 181 181 181 16 15 1 14 1 13 16 12 12 12 10 9 9 9 11 1 1 11 11 7 11 12 10 10 10 9 9 9 9 9 9 9 9 9 4 9 8 8 8 8 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 /* * linux/fs/hfs/catalog.c * * Copyright (C) 1995-1997 Paul H. Hargrove * (C) 2003 Ardis Technologies <roman@ardistech.com> * This file may be distributed under the terms of the GNU General Public License. * * This file contains the functions related to the catalog B-tree. * * Cache code shamelessly stolen from * linux/fs/inode.c Copyright (C) 1991, 1992 Linus Torvalds * re-shamelessly stolen Copyright (C) 1997 Linus Torvalds */ #include "hfs_fs.h" #include "btree.h" /* * hfs_cat_build_key() * * Given the ID of the parent and the name build a search key. */ void hfs_cat_build_key(struct super_block *sb, btree_key *key, u32 parent, const struct qstr *name) { key->cat.reserved = 0; key->cat.ParID = cpu_to_be32(parent); if (name) { hfs_asc2mac(sb, &key->cat.CName, name); key->key_len = 6 + key->cat.CName.len; } else { memset(&key->cat.CName, 0, sizeof(struct hfs_name)); key->key_len = 6; } } static int hfs_cat_build_record(hfs_cat_rec *rec, u32 cnid, struct inode *inode) { __be32 mtime = hfs_mtime(); memset(rec, 0, sizeof(*rec)); if (S_ISDIR(inode->i_mode)) { rec->type = HFS_CDR_DIR; rec->dir.DirID = cpu_to_be32(cnid); rec->dir.CrDat = mtime; rec->dir.MdDat = mtime; rec->dir.BkDat = 0; rec->dir.UsrInfo.frView = cpu_to_be16(0xff); return sizeof(struct hfs_cat_dir); } else { /* init some fields for the file record */ rec->type = HFS_CDR_FIL; rec->file.Flags = HFS_FIL_USED | HFS_FIL_THD; if (!(inode->i_mode & S_IWUSR)) rec->file.Flags |= HFS_FIL_LOCK; rec->file.FlNum = cpu_to_be32(cnid); rec->file.CrDat = mtime; rec->file.MdDat = mtime; rec->file.BkDat = 0; rec->file.UsrWds.fdType = HFS_SB(inode->i_sb)->s_type; rec->file.UsrWds.fdCreator = HFS_SB(inode->i_sb)->s_creator; return sizeof(struct hfs_cat_file); } } static int hfs_cat_build_thread(struct super_block *sb, hfs_cat_rec *rec, int type, u32 parentid, const struct qstr *name) { rec->type = type; memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved)); rec->thread.ParID = cpu_to_be32(parentid); hfs_asc2mac(sb, &rec->thread.CName, name); return sizeof(struct hfs_cat_thread); } /* * create_entry() * * Add a new file or directory to the catalog B-tree and * return a (struct hfs_cat_entry) for it in '*result'. */ int hfs_cat_create(u32 cnid, struct inode *dir, const struct qstr *str, struct inode *inode) { struct hfs_find_data fd; struct super_block *sb; union hfs_cat_rec entry; int entry_size; int err; hfs_dbg(CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); if (dir->i_size >= HFS_MAX_VALENCE) return -ENOSPC; sb = dir->i_sb; err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); if (err) return err; /* * Fail early and avoid ENOSPC during the btree operations. We may * have to split the root node at most once. */ err = hfs_bmap_reserve(fd.tree, 2 * fd.tree->depth); if (err) goto err2; hfs_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ? HFS_CDR_THD : HFS_CDR_FTH, dir->i_ino, str); err = hfs_brec_find(&fd); if (err != -ENOENT) { if (!err) err = -EEXIST; goto err2; } err = hfs_brec_insert(&fd, &entry, entry_size); if (err) goto err2; hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); entry_size = hfs_cat_build_record(&entry, cnid, inode); err = hfs_brec_find(&fd); if (err != -ENOENT) { /* panic? */ if (!err) err = -EEXIST; goto err1; } err = hfs_brec_insert(&fd, &entry, entry_size); if (err) goto err1; dir->i_size++; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); mark_inode_dirty(dir); hfs_find_exit(&fd); return 0; err1: hfs_cat_build_key(sb, fd.search_key, cnid, NULL); if (!hfs_brec_find(&fd)) hfs_brec_remove(&fd); err2: hfs_find_exit(&fd); return err; } /* * hfs_cat_compare() * * Description: * This is the comparison function used for the catalog B-tree. In * comparing catalog B-tree entries, the parent id is the most * significant field (compared as unsigned ints). The name field is * the least significant (compared in "Macintosh lexical order", * see hfs_strcmp() in string.c) * Input Variable(s): * struct hfs_cat_key *key1: pointer to the first key to compare * struct hfs_cat_key *key2: pointer to the second key to compare * Output Variable(s): * NONE * Returns: * int: negative if key1<key2, positive if key1>key2, and 0 if key1==key2 * Preconditions: * key1 and key2 point to "valid" (struct hfs_cat_key)s. * Postconditions: * This function has no side-effects */ int hfs_cat_keycmp(const btree_key *key1, const btree_key *key2) { __be32 k1p, k2p; k1p = key1->cat.ParID; k2p = key2->cat.ParID; if (k1p != k2p) return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1; return hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len, key2->cat.CName.name, key2->cat.CName.len); } /* Try to get a catalog entry for given catalog id */ // move to read_super??? int hfs_cat_find_brec(struct super_block *sb, u32 cnid, struct hfs_find_data *fd) { hfs_cat_rec rec; int res, len, type; hfs_cat_build_key(sb, fd->search_key, cnid, NULL); res = hfs_brec_read(fd, &rec, sizeof(rec)); if (res) return res; type = rec.type; if (type != HFS_CDR_THD && type != HFS_CDR_FTH) { pr_err("found bad thread record in catalog\n"); return -EIO; } fd->search_key->cat.ParID = rec.thread.ParID; len = fd->search_key->cat.CName.len = rec.thread.CName.len; if (len > HFS_NAMELEN) { pr_err("bad catalog namelength\n"); return -EIO; } memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); return hfs_brec_find(fd); } /* * hfs_cat_delete() * * Delete the indicated file or directory. * The associated thread is also removed unless ('with_thread'==0). */ int hfs_cat_delete(u32 cnid, struct inode *dir, const struct qstr *str) { struct super_block *sb; struct hfs_find_data fd; struct hfs_readdir_data *rd; int res, type; hfs_dbg(CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); sb = dir->i_sb; res = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); if (res) return res; hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); res = hfs_brec_find(&fd); if (res) goto out; type = hfs_bnode_read_u8(fd.bnode, fd.entryoffset); if (type == HFS_CDR_FIL) { struct hfs_cat_file file; hfs_bnode_read(fd.bnode, &file, fd.entryoffset, sizeof(file)); if (be32_to_cpu(file.FlNum) == cnid) { #if 0 hfs_free_fork(sb, &file, HFS_FK_DATA); #endif hfs_free_fork(sb, &file, HFS_FK_RSRC); } } /* we only need to take spinlock for exclusion with ->release() */ spin_lock(&HFS_I(dir)->open_dir_lock); list_for_each_entry(rd, &HFS_I(dir)->open_dir_list, list) { if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) rd->file->f_pos--; } spin_unlock(&HFS_I(dir)->open_dir_lock); res = hfs_brec_remove(&fd); if (res) goto out; hfs_cat_build_key(sb, fd.search_key, cnid, NULL); res = hfs_brec_find(&fd); if (!res) { res = hfs_brec_remove(&fd); if (res) goto out; } dir->i_size--; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); mark_inode_dirty(dir); res = 0; out: hfs_find_exit(&fd); return res; } /* * hfs_cat_move() * * Rename a file or directory, possibly to a new directory. * If the destination exists it is removed and a * (struct hfs_cat_entry) for it is returned in '*result'. */ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name, struct inode *dst_dir, const struct qstr *dst_name) { struct super_block *sb; struct hfs_find_data src_fd, dst_fd; union hfs_cat_rec entry; int entry_size, type; int err; hfs_dbg(CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); sb = src_dir->i_sb; err = hfs_find_init(HFS_SB(sb)->cat_tree, &src_fd); if (err) return err; dst_fd = src_fd; /* * Fail early and avoid ENOSPC during the btree operations. We may * have to split the root node at most once. */ err = hfs_bmap_reserve(src_fd.tree, 2 * src_fd.tree->depth); if (err) goto out; /* find the old dir entry and read the data */ hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); err = hfs_brec_find(&src_fd); if (err) goto out; if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) { err = -EIO; goto out; } hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); /* create new dir entry with the data from the old entry */ hfs_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); err = hfs_brec_find(&dst_fd); if (err != -ENOENT) { if (!err) err = -EEXIST; goto out; } err = hfs_brec_insert(&dst_fd, &entry, src_fd.entrylength); if (err) goto out; dst_dir->i_size++; inode_set_mtime_to_ts(dst_dir, inode_set_ctime_current(dst_dir)); mark_inode_dirty(dst_dir); /* finally remove the old entry */ hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); err = hfs_brec_find(&src_fd); if (err) goto out; err = hfs_brec_remove(&src_fd); if (err) goto out; src_dir->i_size--; inode_set_mtime_to_ts(src_dir, inode_set_ctime_current(src_dir)); mark_inode_dirty(src_dir); type = entry.type; if (type == HFS_CDR_FIL && !(entry.file.Flags & HFS_FIL_THD)) goto out; /* remove old thread entry */ hfs_cat_build_key(sb, src_fd.search_key, cnid, NULL); err = hfs_brec_find(&src_fd); if (err) goto out; err = hfs_brec_remove(&src_fd); if (err) goto out; /* create new thread entry */ hfs_cat_build_key(sb, dst_fd.search_key, cnid, NULL); entry_size = hfs_cat_build_thread(sb, &entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD, dst_dir->i_ino, dst_name); err = hfs_brec_find(&dst_fd); if (err != -ENOENT) { if (!err) err = -EEXIST; goto out; } err = hfs_brec_insert(&dst_fd, &entry, entry_size); out: hfs_bnode_put(dst_fd.bnode); hfs_find_exit(&src_fd); return err; }
1 1 4 12 12 9 4 1 4 3 2 5 4 4 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> * Patrick Schaaf <bof@bof.de> * Martin Josefsson <gandalf@wlug.westbo.se> */ /* Kernel module implementing an IP set type: the bitmap:ip,mac type */ #include <linux/module.h> #include <linux/ip.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/if_ether.h> #include <linux/netlink.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <net/netlink.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Counter support added */ /* 2 Comment support added */ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac #define HOST_MASK 32 #define IP_SET_BITMAP_STORED_TIMEOUT enum { MAC_UNSET, /* element is set, without MAC */ MAC_FILLED, /* element is set with MAC */ }; /* Type structure */ struct bitmap_ipmac { unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ struct timer_list gc; /* garbage collector */ struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[] /* MAC + data extensions */ __aligned(__alignof__(u64)); }; /* ADT structure for generic function args */ struct bitmap_ipmac_adt_elem { unsigned char ether[ETH_ALEN] __aligned(2); u16 id; u16 add_mac; }; struct bitmap_ipmac_elem { unsigned char ether[ETH_ALEN]; unsigned char filled; } __aligned(__alignof__(u64)); static u32 ip_to_id(const struct bitmap_ipmac *m, u32 ip) { return ip - m->first_ip; } #define get_elem(extensions, id, dsize) \ (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize)) #define get_const_elem(extensions, id, dsize) \ (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize)) /* Common functions */ static int bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(e->id, map->members)) return 0; elem = get_const_elem(map->extensions, e->id, dsize); if (e->add_mac && elem->filled == MAC_FILLED) return ether_addr_equal(e->ether, elem->ether); /* Trigger kernel to fill out the ethernet address */ return -EAGAIN; } static int bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(id, map->members)) return 0; elem = get_const_elem(map->extensions, id, dsize); /* Timer not started for the incomplete elements */ return elem->filled == MAC_FILLED; } static int bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem) { return elem->filled == MAC_FILLED; } static int bitmap_ipmac_add_timeout(unsigned long *timeout, const struct bitmap_ipmac_adt_elem *e, const struct ip_set_ext *ext, struct ip_set *set, struct bitmap_ipmac *map, int mode) { u32 t = ext->timeout; if (mode == IPSET_ADD_START_STORED_TIMEOUT) { if (t == set->timeout) /* Timeout was not specified, get stored one */ t = *timeout; ip_set_timeout_set(timeout, t); } else { /* If MAC is unset yet, we store plain timeout value * because the timer is not activated yet * and we can reuse it later when MAC is filled out, * possibly by the kernel */ if (e->add_mac) ip_set_timeout_set(timeout, t); else *timeout = t; } return 0; } static int bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, struct bitmap_ipmac *map, u32 flags, size_t dsize) { struct bitmap_ipmac_elem *elem; elem = get_elem(map->extensions, e->id, dsize); if (test_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { if (e->add_mac && (flags & IPSET_FLAG_EXIST) && !ether_addr_equal(e->ether, elem->ether)) { /* memcpy isn't atomic */ clear_bit(e->id, map->members); smp_mb__after_atomic(); ether_addr_copy(elem->ether, e->ether); } return IPSET_ADD_FAILED; } else if (!e->add_mac) /* Already added without ethernet address */ return IPSET_ADD_FAILED; /* Fill the MAC address and trigger the timer activation */ clear_bit(e->id, map->members); smp_mb__after_atomic(); ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return IPSET_ADD_START_STORED_TIMEOUT; } else if (e->add_mac) { /* We can store MAC too */ ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return 0; } elem->filled = MAC_UNSET; /* MAC is not stored yet, don't start timer */ return IPSET_ADD_STORE_PLAIN_TIMEOUT; } static int bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, struct bitmap_ipmac *map) { return !test_and_clear_bit(e->id, map->members); } static int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, u32 id, size_t dsize) { const struct bitmap_ipmac_elem *elem = get_const_elem(map->extensions, id, dsize); return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id)) || (elem->filled == MAC_FILLED && nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether)); } static int bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map) { return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); } static int bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; /* Backward compatibility: we don't check the second flag */ if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; e.id = ip_to_id(map, ip); if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); if (is_zero_ether_addr(e.ether)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; e.id = ip_to_id(map, ip); if (tb[IPSET_ATTR_ETHER]) { if (nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN) return -IPSET_ERR_PROTOCOL; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); e.add_mac = 1; } ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } static bool bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) { const struct bitmap_ipmac *x = a->data; const struct bitmap_ipmac *y = b->data; return x->first_ip == y->first_ip && x->last_ip == y->last_ip && a->timeout == b->timeout && a->extensions == b->extensions; } /* Plain variant */ #include "ip_set_bitmap_gen.h" /* Create bitmap:ip,mac type of sets */ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; map->last_ip = last_ip; map->elements = elements; set->timeout = IPSET_NO_TIMEOUT; map->set = set; set->data = map; set->family = NFPROTO_IPV4; return true; } static int bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 first_ip = 0, last_ip = 0; u64 elements; struct bitmap_ipmac *map; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); if (ret) return ret; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); if (ret) return ret; if (first_ip > last_ip) swap(first_ip, last_ip); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); } else { return -IPSET_ERR_PROTOCOL; } elements = (u64)last_ip - first_ip + 1; if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; set->dsize = ip_set_elem_len(set, tb, sizeof(struct bitmap_ipmac_elem), __alignof__(struct bitmap_ipmac_elem)); map = ip_set_alloc(sizeof(*map) + elements * set->dsize); if (!map) return -ENOMEM; map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } return 0; } static struct ip_set_type bitmap_ipmac_type = { .name = "bitmap:ip,mac", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init bitmap_ipmac_init(void) { return ip_set_type_register(&bitmap_ipmac_type); } static void __exit bitmap_ipmac_fini(void) { rcu_barrier(); ip_set_type_unregister(&bitmap_ipmac_type); } module_init(bitmap_ipmac_init); module_exit(bitmap_ipmac_fini);
4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Antonio Quartulli */ #include "bat_v_ogm.h" #include "main.h" #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/minmax.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <linux/workqueue.h> #include <uapi/linux/batadv_packet.h> #include "bat_algo.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "originator.h" #include "routing.h" #include "send.h" #include "translation-table.h" #include "tvlv.h" /** * batadv_v_ogm_orig_get() - retrieve and possibly create an originator node * @bat_priv: the bat priv with all the soft interface information * @addr: the address of the originator * * Return: the orig_node corresponding to the specified address. If such an * object does not exist, it is allocated here. In case of allocation failure * returns NULL. */ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; int hash_added; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) return orig_node; orig_node = batadv_orig_node_new(bat_priv, addr); if (!orig_node) return NULL; kref_get(&orig_node->refcount); hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) { /* remove refcnt for newly created orig_node and hash entry */ batadv_orig_node_put(orig_node); batadv_orig_node_put(orig_node); orig_node = NULL; } return orig_node; } /** * batadv_v_ogm_start_queue_timer() - restart the OGM aggregation timer * @hard_iface: the interface to use to send the OGM */ static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface) { unsigned int msecs = BATADV_MAX_AGGREGATION_MS * 1000; /* msecs * [0.9, 1.1] */ msecs += get_random_u32_below(msecs / 5) - (msecs / 10); queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.aggr_wq, msecs_to_jiffies(msecs / 1000)); } /** * batadv_v_ogm_start_timer() - restart the OGM sending timer * @bat_priv: the bat priv with all the soft interface information */ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) { unsigned long msecs; /* this function may be invoked in different contexts (ogm rescheduling * or hard_iface activation), but the work timer should not be reset */ if (delayed_work_pending(&bat_priv->bat_v.ogm_wq)) return; msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER; msecs += get_random_u32_below(2 * BATADV_JITTER); queue_delayed_work(batadv_event_workqueue, &bat_priv->bat_v.ogm_wq, msecs_to_jiffies(msecs)); } /** * batadv_v_ogm_send_to_if() - send a batman ogm using a given interface * @skb: the OGM to send * @hard_iface: the interface to use to send the OGM */ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); if (hard_iface->if_status != BATADV_IF_ACTIVE) { kfree_skb(skb); return; } batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES, skb->len + ETH_HLEN); batadv_send_broadcast_skb(skb, hard_iface); } /** * batadv_v_ogm_len() - OGMv2 packet length * @skb: the OGM to check * * Return: Length of the given OGMv2 packet, including tvlv length, excluding * ethernet header length. */ static unsigned int batadv_v_ogm_len(struct sk_buff *skb) { struct batadv_ogm2_packet *ogm_packet; ogm_packet = (struct batadv_ogm2_packet *)skb->data; return BATADV_OGM2_HLEN + ntohs(ogm_packet->tvlv_len); } /** * batadv_v_ogm_queue_left() - check if given OGM still fits aggregation queue * @skb: the OGM to check * @hard_iface: the interface to use to send the OGM * * Caller needs to hold the hard_iface->bat_v.aggr_list.lock. * * Return: True, if the given OGMv2 packet still fits, false otherwise. */ static bool batadv_v_ogm_queue_left(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { unsigned int max = min_t(unsigned int, hard_iface->net_dev->mtu, BATADV_MAX_AGGREGATION_BYTES); unsigned int ogm_len = batadv_v_ogm_len(skb); lockdep_assert_held(&hard_iface->bat_v.aggr_list.lock); return hard_iface->bat_v.aggr_len + ogm_len <= max; } /** * batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue * @hard_iface: the interface holding the aggregation queue * * Empties the OGMv2 aggregation queue and frees all the skbs it contains. * * Caller needs to hold the hard_iface->bat_v.aggr_list.lock. */ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) { lockdep_assert_held(&hard_iface->bat_v.aggr_list.lock); __skb_queue_purge(&hard_iface->bat_v.aggr_list); hard_iface->bat_v.aggr_len = 0; } /** * batadv_v_ogm_aggr_send() - flush & send aggregation queue * @hard_iface: the interface with the aggregation queue to flush * * Aggregates all OGMv2 packets currently in the aggregation queue into a * single OGMv2 packet and transmits this aggregate. * * The aggregation queue is empty after this call. * * Caller needs to hold the hard_iface->bat_v.aggr_list.lock. */ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) { unsigned int aggr_len = hard_iface->bat_v.aggr_len; struct sk_buff *skb_aggr; unsigned int ogm_len; struct sk_buff *skb; lockdep_assert_held(&hard_iface->bat_v.aggr_list.lock); if (!aggr_len) return; skb_aggr = dev_alloc_skb(aggr_len + ETH_HLEN + NET_IP_ALIGN); if (!skb_aggr) { batadv_v_ogm_aggr_list_free(hard_iface); return; } skb_reserve(skb_aggr, ETH_HLEN + NET_IP_ALIGN); skb_reset_network_header(skb_aggr); while ((skb = __skb_dequeue(&hard_iface->bat_v.aggr_list))) { hard_iface->bat_v.aggr_len -= batadv_v_ogm_len(skb); ogm_len = batadv_v_ogm_len(skb); skb_put_data(skb_aggr, skb->data, ogm_len); consume_skb(skb); } batadv_v_ogm_send_to_if(skb_aggr, hard_iface); } /** * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface * @skb: the OGM to queue * @hard_iface: the interface to queue the OGM on */ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); if (!atomic_read(&bat_priv->aggregated_ogms)) { batadv_v_ogm_send_to_if(skb, hard_iface); return; } spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); if (!batadv_v_ogm_queue_left(skb, hard_iface)) batadv_v_ogm_aggr_send(hard_iface); hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb); __skb_queue_tail(&hard_iface->bat_v.aggr_list, skb); spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); } /** * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM * @bat_priv: the bat priv with all the soft interface information */ static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; unsigned char *ogm_buff; int ogm_buff_len; u16 tvlv_len = 0; int ret; lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; ogm_buff = bat_priv->bat_v.ogm_buff; ogm_buff_len = bat_priv->bat_v.ogm_buff_len; /* tt changes have to be committed before the tvlv data is * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff, &ogm_buff_len, BATADV_OGM2_HLEN); bat_priv->bat_v.ogm_buff = ogm_buff; bat_priv->bat_v.ogm_buff_len = ogm_buff_len; skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len); if (!skb) goto reschedule; skb_reserve(skb, ETH_HLEN); skb_put_data(skb, ogm_buff, ogm_buff_len); ogm_packet = (struct batadv_ogm2_packet *)skb->data; ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno)); atomic_inc(&bat_priv->bat_v.ogm_seqno); ogm_packet->tvlv_len = htons(tvlv_len); /* broadcast on every interface */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) continue; ret = batadv_hardif_no_broadcast(hard_iface, NULL, NULL); if (ret) { char *type; switch (ret) { case BATADV_HARDIF_BCAST_NORECIPIENT: type = "no neighbor"; break; case BATADV_HARDIF_BCAST_DUPFWD: type = "single neighbor is source"; break; case BATADV_HARDIF_BCAST_DUPORIG: type = "single neighbor is originator"; break; default: type = "unknown"; } batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselves on %s suppressed: %s\n", hard_iface->net_dev->name, type); batadv_hardif_put(hard_iface); continue; } batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n", ogm_packet->orig, ntohl(ogm_packet->seqno), ntohl(ogm_packet->throughput), ogm_packet->ttl, hard_iface->net_dev->name, hard_iface->net_dev->dev_addr); /* this skb gets consumed by batadv_v_ogm_send_to_if() */ skb_tmp = skb_clone(skb, GFP_ATOMIC); if (!skb_tmp) { batadv_hardif_put(hard_iface); break; } batadv_v_ogm_queue_on_if(skb_tmp, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); consume_skb(skb); reschedule: batadv_v_ogm_start_timer(bat_priv); out: return; } /** * batadv_v_ogm_send() - periodic worker broadcasting the own OGM * @work: work queue item */ static void batadv_v_ogm_send(struct work_struct *work) { struct batadv_priv_bat_v *bat_v; struct batadv_priv *bat_priv; bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); bat_priv = container_of(bat_v, struct batadv_priv, bat_v); mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); batadv_v_ogm_send_softif(bat_priv); mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface * @work: work queue item * * Emits aggregated OGM messages in regular intervals. */ void batadv_v_ogm_aggr_work(struct work_struct *work) { struct batadv_hard_iface_bat_v *batv; struct batadv_hard_iface *hard_iface; batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work); hard_iface = container_of(batv, struct batadv_hard_iface, bat_v); spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); batadv_v_ogm_aggr_send(hard_iface); spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); batadv_v_ogm_start_queue_timer(hard_iface); } /** * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare * * Takes care of scheduling its own OGM sending routine for this interface. * * Return: 0 on success or a negative error code otherwise */ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); batadv_v_ogm_start_queue_timer(hard_iface); batadv_v_ogm_start_timer(bat_priv); return 0; } /** * batadv_v_ogm_iface_disable() - release OGM interface private resources * @hard_iface: interface for which the resources have to be released */ void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { cancel_delayed_work_sync(&hard_iface->bat_v.aggr_wq); spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); batadv_v_ogm_aggr_list_free(hard_iface); spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); } /** * batadv_v_ogm_primary_iface_set() - set a new primary interface * @primary_iface: the new primary interface */ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) { struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); struct batadv_ogm2_packet *ogm_packet; mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); if (!bat_priv->bat_v.ogm_buff) goto unlock; ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff; ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr); unlock: mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** * batadv_v_forward_penalty() - apply a penalty to the throughput metric * forwarded with B.A.T.M.A.N. V OGMs * @bat_priv: the bat priv with all the soft interface information * @if_incoming: the interface where the OGM has been received * @if_outgoing: the interface where the OGM has to be forwarded to * @throughput: the current throughput * * Apply a penalty on the current throughput metric value based on the * characteristic of the interface where the OGM has been received. * * Initially the per hardif hop penalty is applied to the throughput. After * that the return value is then computed as follows: * - throughput * 50% if the incoming and outgoing interface are the * same WiFi interface and the throughput is above * 1MBit/s * - throughput if the outgoing interface is the default * interface (i.e. this OGM is processed for the * internal table and not forwarded) * - throughput * node hop penalty otherwise * * Return: the penalised throughput metric. */ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, u32 throughput) { int if_hop_penalty = atomic_read(&if_incoming->hop_penalty); int hop_penalty = atomic_read(&bat_priv->hop_penalty); int hop_penalty_max = BATADV_TQ_MAX_VALUE; /* Apply per hardif hop penalty */ throughput = throughput * (hop_penalty_max - if_hop_penalty) / hop_penalty_max; /* Don't apply hop penalty in default originator table. */ if (if_outgoing == BATADV_IF_DEFAULT) return throughput; /* Forwarding on the same WiFi interface cuts the throughput in half * due to the store & forward characteristics of WIFI. * Very low throughput values are the exception. */ if (throughput > 10 && if_incoming == if_outgoing && !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX)) return throughput / 2; /* hop penalty of 255 equals 100% */ return throughput * (hop_penalty_max - hop_penalty) / hop_penalty_max; } /** * batadv_v_ogm_forward() - check conditions and forward an OGM to the given * outgoing interface * @bat_priv: the bat priv with all the soft interface information * @ogm_received: previously received OGM to be forwarded * @orig_node: the originator which has been updated * @neigh_node: the neigh_node through with the OGM has been received * @if_incoming: the interface on which this OGM was received on * @if_outgoing: the interface to which the OGM has to be forwarded to * * Forward an OGM to an interface after having altered the throughput metric and * the TTL value contained in it. The original OGM isn't modified. */ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, const struct batadv_ogm2_packet *ogm_received, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; struct batadv_orig_ifinfo *orig_ifinfo = NULL; struct batadv_neigh_node *router = NULL; struct batadv_ogm2_packet *ogm_forward; unsigned char *skb_buff; struct sk_buff *skb; size_t packet_len; u16 tvlv_len; /* only forward for specific interfaces, not for the default one. */ if (if_outgoing == BATADV_IF_DEFAULT) goto out; orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); if (!orig_ifinfo) goto out; /* acquire possibly updated router */ router = batadv_orig_router_get(orig_node, if_outgoing); /* strict rule: forward packets coming from the best next hop only */ if (neigh_node != router) goto out; /* don't forward the same seqno twice on one interface */ if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm_received->seqno)) goto out; orig_ifinfo->last_seqno_forwarded = ntohl(ogm_received->seqno); if (ogm_received->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); goto out; } neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); if (!neigh_ifinfo) goto out; tvlv_len = ntohs(ogm_received->tvlv_len); packet_len = BATADV_OGM2_HLEN + tvlv_len; skb = netdev_alloc_skb_ip_align(if_outgoing->net_dev, ETH_HLEN + packet_len); if (!skb) goto out; skb_reserve(skb, ETH_HLEN); skb_buff = skb_put_data(skb, ogm_received, packet_len); /* apply forward penalty */ ogm_forward = (struct batadv_ogm2_packet *)skb_buff; ogm_forward->throughput = htonl(neigh_ifinfo->bat_v.throughput); ogm_forward->ttl--; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding OGM2 packet on %s: throughput %u, ttl %u, received via %s\n", if_outgoing->net_dev->name, ntohl(ogm_forward->throughput), ogm_forward->ttl, if_incoming->net_dev->name); batadv_v_ogm_queue_on_if(skb, if_outgoing); out: batadv_orig_ifinfo_put(orig_ifinfo); batadv_neigh_node_put(router); batadv_neigh_ifinfo_put(neigh_ifinfo); } /** * batadv_v_ogm_metric_update() - update route metric based on OGM * @bat_priv: the bat priv with all the soft interface information * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received * @neigh_node: the neigh_node through with the OGM has been received * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered * * Return: * 1 if the OGM is new, * 0 if it is not new but valid, * <0 on error (e.g. old OGM) */ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv, const struct batadv_ogm2_packet *ogm2, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; bool protection_started = false; int ret = -EINVAL; u32 path_throughput; s32 seq_diff; orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); if (!orig_ifinfo) goto out; seq_diff = ntohl(ogm2->seqno) - orig_ifinfo->last_real_seqno; if (!hlist_empty(&orig_node->neigh_list) && batadv_window_protected(bat_priv, seq_diff, BATADV_OGM_MAX_AGE, &orig_ifinfo->batman_seqno_reset, &protection_started)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: packet within window protection time from %pM\n", ogm2->orig); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Last reset: %ld, %ld\n", orig_ifinfo->batman_seqno_reset, jiffies); goto out; } /* drop packets with old seqnos, however accept the first packet after * a host has been rebooted. */ if (seq_diff < 0 && !protection_started) goto out; neigh_node->last_seen = jiffies; orig_node->last_seen = jiffies; orig_ifinfo->last_real_seqno = ntohl(ogm2->seqno); orig_ifinfo->last_ttl = ogm2->ttl; neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); if (!neigh_ifinfo) goto out; path_throughput = batadv_v_forward_penalty(bat_priv, if_incoming, if_outgoing, ntohl(ogm2->throughput)); neigh_ifinfo->bat_v.throughput = path_throughput; neigh_ifinfo->bat_v.last_seqno = ntohl(ogm2->seqno); neigh_ifinfo->last_ttl = ogm2->ttl; if (seq_diff > 0 || protection_started) ret = 1; else ret = 0; out: batadv_orig_ifinfo_put(orig_ifinfo); batadv_neigh_ifinfo_put(neigh_ifinfo); return ret; } /** * batadv_v_ogm_route_update() - update routes based on OGM * @bat_priv: the bat priv with all the soft interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received * @neigh_node: the neigh_node through with the OGM has been received * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered * * Return: true if the packet should be forwarded, false otherwise */ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv, const struct ethhdr *ethhdr, const struct batadv_ogm2_packet *ogm2, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_neigh_node; struct batadv_neigh_node *orig_neigh_router = NULL; struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL; u32 router_throughput, neigh_throughput; u32 router_last_seqno; u32 neigh_last_seqno; s32 neigh_seq_diff; bool forward = false; orig_neigh_node = batadv_v_ogm_orig_get(bat_priv, ethhdr->h_source); if (!orig_neigh_node) goto out; orig_neigh_router = batadv_orig_router_get(orig_neigh_node, if_outgoing); /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ router = batadv_orig_router_get(orig_node, if_outgoing); if (router && router->orig_node != orig_node && !orig_neigh_router) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); goto out; } /* Mark the OGM to be considered for forwarding, and update routes * if needed. */ forward = true; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Searching and updating originator entry of received packet\n"); /* if this neighbor already is our next hop there is nothing * to change */ if (router == neigh_node) goto out; /* don't consider neighbours with worse throughput. * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than * the last received seqno from our best next hop. */ if (router) { router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); /* if these are not allocated, something is wrong. */ if (!router_ifinfo || !neigh_ifinfo) goto out; neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno; router_last_seqno = router_ifinfo->bat_v.last_seqno; neigh_seq_diff = neigh_last_seqno - router_last_seqno; router_throughput = router_ifinfo->bat_v.throughput; neigh_throughput = neigh_ifinfo->bat_v.throughput; if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF && router_throughput >= neigh_throughput) goto out; } batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node); out: batadv_neigh_node_put(router); batadv_neigh_node_put(orig_neigh_router); batadv_orig_node_put(orig_neigh_node); batadv_neigh_ifinfo_put(router_ifinfo); batadv_neigh_ifinfo_put(neigh_ifinfo); return forward; } /** * batadv_v_ogm_process_per_outif() - process a batman v OGM for an outgoing if * @bat_priv: the bat priv with all the soft interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received * @neigh_node: the neigh_node through with the OGM has been received * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered */ static void batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, const struct ethhdr *ethhdr, const struct batadv_ogm2_packet *ogm2, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { int seqno_age; bool forward; /* first, update the metric with according sanity checks */ seqno_age = batadv_v_ogm_metric_update(bat_priv, ogm2, orig_node, neigh_node, if_incoming, if_outgoing); /* outdated sequence numbers are to be discarded */ if (seqno_age < 0) return; /* only unknown & newer OGMs contain TVLVs we are interested in */ if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT) batadv_tvlv_containers_process(bat_priv, BATADV_OGM2, orig_node, NULL, (unsigned char *)(ogm2 + 1), ntohs(ogm2->tvlv_len)); /* if the metric update went through, update routes if needed */ forward = batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node, neigh_node, if_incoming, if_outgoing); /* if the routes have been processed correctly, check and forward */ if (forward) batadv_v_ogm_forward(bat_priv, ogm2, orig_node, neigh_node, if_incoming, if_outgoing); } /** * batadv_v_ogm_aggr_packet() - checks if there is another OGM aggregated * @buff_pos: current position in the skb * @packet_len: total length of the skb * @ogm2_packet: potential OGM2 in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, const struct batadv_ogm2_packet *ogm2_packet) { int next_buff_pos = 0; /* check if there is enough space for the header */ next_buff_pos += buff_pos + sizeof(*ogm2_packet); if (next_buff_pos > packet_len) return false; /* check if there is enough space for the optional TVLV */ next_buff_pos += ntohs(ogm2_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); } /** * batadv_v_ogm_process() - process an incoming batman v OGM * @skb: the skb containing the OGM * @ogm_offset: offset to the OGM which should be processed (for aggregates) * @if_incoming: the interface where this packet was received */ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct ethhdr *ethhdr; struct batadv_orig_node *orig_node = NULL; struct batadv_hardif_neigh_node *hardif_neigh = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; u32 ogm_throughput, link_throughput, path_throughput; int ret; ethhdr = eth_hdr(skb); ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset); ogm_throughput = ntohl(ogm_packet->throughput); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, throughput %u, TTL %u, V %u, tvlv_len %u)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, ogm_packet->orig, ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl, ogm_packet->version, ntohs(ogm_packet->tvlv_len)); if (batadv_is_my_mac(bat_priv, ogm_packet->orig)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: originator packet from ourself\n"); return; } /* If the throughput metric is 0, immediately drop the packet. No need * to create orig_node / neigh_node for an unusable route. */ if (ogm_throughput == 0) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: originator packet with throughput metric of 0\n"); return; } /* require ELP packets be to received from this neighbor first */ hardif_neigh = batadv_hardif_neigh_get(if_incoming, ethhdr->h_source); if (!hardif_neigh) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); goto out; } orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig); if (!orig_node) goto out; neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming, ethhdr->h_source); if (!neigh_node) goto out; /* Update the received throughput metric to match the link * characteristic: * - If this OGM traveled one hop so far (emitted by single hop * neighbor) the path throughput metric equals the link throughput. * - For OGMs traversing more than hop the path throughput metric is * the smaller of the path throughput and the link throughput. */ link_throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput); path_throughput = min_t(u32, link_throughput, ogm_throughput); ogm_packet->throughput = htonl(path_throughput); batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, orig_node, neigh_node, if_incoming, BATADV_IF_DEFAULT); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) continue; ret = batadv_hardif_no_broadcast(hard_iface, ogm_packet->orig, hardif_neigh->orig); if (ret) { char *type; switch (ret) { case BATADV_HARDIF_BCAST_NORECIPIENT: type = "no neighbor"; break; case BATADV_HARDIF_BCAST_DUPFWD: type = "single neighbor is source"; break; case BATADV_HARDIF_BCAST_DUPORIG: type = "single neighbor is originator"; break; default: type = "unknown"; } batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s suppressed: %s\n", ogm_packet->orig, hard_iface->net_dev->name, type); batadv_hardif_put(hard_iface); continue; } batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, orig_node, neigh_node, if_incoming, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); out: batadv_orig_node_put(orig_node); batadv_neigh_node_put(neigh_node); batadv_hardif_neigh_put(hardif_neigh); } /** * batadv_v_ogm_packet_recv() - OGM2 receiving handler * @skb: the received OGM * @if_incoming: the interface where this OGM has been received * * Return: NET_RX_SUCCESS and consume the skb on success or returns NET_RX_DROP * (without freeing the skb) on failure */ int batadv_v_ogm_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_ogm2_packet *ogm_packet; struct ethhdr *ethhdr; int ogm_offset; u8 *packet_pos; int ret = NET_RX_DROP; /* did we receive a OGM2 packet on an interface that does not have * B.A.T.M.A.N. V enabled ? */ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) goto free_skb; if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) goto free_skb; ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto free_skb; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, skb->len + ETH_HLEN); ogm_offset = 0; ogm_packet = (struct batadv_ogm2_packet *)skb->data; while (batadv_v_ogm_aggr_packet(ogm_offset, skb_headlen(skb), ogm_packet)) { batadv_v_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM2_HLEN; ogm_offset += ntohs(ogm_packet->tvlv_len); packet_pos = skb->data + ogm_offset; ogm_packet = (struct batadv_ogm2_packet *)packet_pos; } ret = NET_RX_SUCCESS; free_skb: if (ret == NET_RX_SUCCESS) consume_skb(skb); else kfree_skb(skb); return ret; } /** * batadv_v_ogm_init() - initialise the OGM2 engine * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or a negative error code in case of failure */ int batadv_v_ogm_init(struct batadv_priv *bat_priv) { struct batadv_ogm2_packet *ogm_packet; unsigned char *ogm_buff; u32 random_seqno; bat_priv->bat_v.ogm_buff_len = BATADV_OGM2_HLEN; ogm_buff = kzalloc(bat_priv->bat_v.ogm_buff_len, GFP_ATOMIC); if (!ogm_buff) return -ENOMEM; bat_priv->bat_v.ogm_buff = ogm_buff; ogm_packet = (struct batadv_ogm2_packet *)ogm_buff; ogm_packet->packet_type = BATADV_OGM2; ogm_packet->version = BATADV_COMPAT_VERSION; ogm_packet->ttl = BATADV_TTL; ogm_packet->flags = BATADV_NO_FLAGS; ogm_packet->throughput = htonl(BATADV_THROUGHPUT_MAX_VALUE); /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno); INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send); mutex_init(&bat_priv->bat_v.ogm_buff_mutex); return 0; } /** * batadv_v_ogm_free() - free OGM private resources * @bat_priv: the bat priv with all the soft interface information */ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq); mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); kfree(bat_priv->bat_v.ogm_buff); bat_priv->bat_v.ogm_buff = NULL; bat_priv->bat_v.ogm_buff_len = 0; mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); }
15 20 20 20 17 17 20 20 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 /* ****************************************************************** * FSE : Finite State Entropy codec * Public Prototypes declaration * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #ifndef FSE_H #define FSE_H /*-***************************************** * Dependencies ******************************************/ #include "zstd_deps.h" /* size_t, ptrdiff_t */ /*-***************************************** * FSE_PUBLIC_API : control library symbols visibility ******************************************/ #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) # define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ # define FSE_PUBLIC_API __declspec(dllexport) #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) # define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else # define FSE_PUBLIC_API #endif /*------ Version ------*/ #define FSE_VERSION_MAJOR 0 #define FSE_VERSION_MINOR 9 #define FSE_VERSION_RELEASE 0 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE #define FSE_QUOTE(str) #str #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */ /*-**************************************** * FSE simple functions ******************************************/ /*! FSE_compress() : Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). @return : size of compressed data (<= dstCapacity). Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. if FSE_isError(return), compression failed (more details using FSE_getErrorName()) */ FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize); /*! FSE_decompress(): Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', into already allocated destination buffer 'dst', of size 'dstCapacity'. @return : size of regenerated data (<= maxDstSize), or an error code, which can be tested using FSE_isError() . ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! Why ? : making this distinction requires a header. Header management is intentionally delegated to the user layer, which can better manage special cases. */ FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize); /*-***************************************** * Tool functions ******************************************/ FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ /* Error Management */ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ /*-***************************************** * FSE advanced functions ******************************************/ /*! FSE_compress2() : Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' Both parameters can be defined as '0' to mean : use default value @return : size of compressed data Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. if FSE_isError(return), it's an error code. */ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); /*-***************************************** * FSE detailed API ******************************************/ /*! FSE_compress() does the following: 1. count symbol occurrence from source[] into table count[] (see hist.h) 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) 3. save normalized counters to memory buffer using writeNCount() 4. build encoding table 'CTable' from normalized counters 5. encode the data stream using encoding table 'CTable' FSE_decompress() does the following: 1. read normalized counters with readNCount() 2. build decoding table 'DTable' from normalized counters 3. decode the data stream using decoding table 'DTable' The following API allows targeting specific sub-functions for advanced tasks. For example, it's possible to compress several blocks using the same 'CTable', or to save and provide normalized distribution using external method. */ /* *** COMPRESSION *** */ /*! FSE_optimalTableLog(): dynamically downsize 'tableLog' when conditions are met. It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. @return : recommended tableLog (necessarily <= 'maxTableLog') */ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). useLowProbCount is a boolean parameter which trades off compressed size for faster header decoding. When it is set to 1, the compressed data will be slightly smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 is a good default, since header deserialization makes a big speed difference. Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. Typically useful for allocation purpose. */ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); /*! FSE_writeNCount(): Compactly save 'normalizedCounter' into 'buffer'. @return : size of the compressed table, or an errorCode, which can be tested using FSE_isError(). */ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! Constructor and Destructor of FSE_CTable. Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); /*! FSE_buildCTable(): Builds `ct`, which must be already allocated, using FSE_createCTable(). @return : 0, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! FSE_compress_usingCTable(): Compress `src` using `ct` into `dst` which must be already allocated. @return : size of compressed data (<= `dstCapacity`), or 0 if compressed data could not fit into `dst`, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); /*! Tutorial : ---------- The first step is to count all symbols. FSE_count() does this job very fast. Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) FSE_count() will return the number of occurrence of the most frequent symbol. This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). The next step is to normalize the frequencies. FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. It also guarantees a minimum of 1 to any Symbol with frequency >= 1. You can use 'tableLog'==0 to mean "use default tableLog value". If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). The result of FSE_normalizeCount() will be saved into a table, called 'normalizedCounter', which is a table of signed short. 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. The return value is tableLog if everything proceeded as expected. It is 0 if there is a single symbol within distribution. If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). 'buffer' must be already allocated. For guaranteed success, buffer size must be at least FSE_headerBound(). The result of the function is the number of bytes written into 'buffer'. If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). 'normalizedCounter' can then be used to create the compression table 'CTable'. The space required by 'CTable' must be already allocated, using FSE_createCTable(). You can then use FSE_buildCTable() to fill 'CTable'. If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. If it returns '0', compressed data could not fit into 'dst'. If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). */ /* *** DECOMPRESSION *** */ /*! FSE_readNCount(): Read compactly saved 'normalizedCounter' from 'rBuffer'. @return : size read from 'rBuffer', or an errorCode, which can be tested using FSE_isError(). maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); /*! FSE_readNCount_bmi2(): * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. */ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize, int bmi2); /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); /*! FSE_buildDTable(): Builds 'dt', which must be already allocated, using FSE_createDTable(). return : 0, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! FSE_decompress_usingDTable(): Decompress compressed source `cSrc` of size `cSrcSize` using `dt` into `dst` which must be already allocated. @return : size of regenerated data (necessarily <= `dstCapacity`), or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); /*! Tutorial : ---------- (Note : these functions only decompress FSE-compressed blocks. If block is uncompressed, use memcpy() instead If block is a single repeated byte, use memset() instead ) The first step is to obtain the normalized frequencies of symbols. This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. In practice, that means it's necessary to know 'maxSymbolValue' beforehand, or size the table to handle worst case situations (typically 256). FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. If there is an error, the function will return an error code, which can be tested using FSE_isError(). The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. This is performed by the function FSE_buildDTable(). The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). If there is an error, the function will return an error code, which can be tested using FSE_isError(). `FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). `cSrcSize` must be strictly correct, otherwise decompression will fail. FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) */ #endif /* FSE_H */ #if !defined(FSE_H_FSE_STATIC_LINKING_ONLY) #define FSE_H_FSE_STATIC_LINKING_ONLY /* *** Dependency *** */ #include "bitstream.h" /* ***************************************** * Static allocation *******************************************/ /* FSE buffer bounds */ #define FSE_NCOUNTBOUND 512 #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) #define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) /* ***************************************** * FSE advanced API ***************************************** */ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); /*< same as FSE_optimalTableLog(), which used `minus==2` */ /* FSE_compress_wksp() : * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. */ #define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); /*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); /*< build a fake FSE_CTable, designed to compress always the same symbolValue */ /* FSE_buildCTable_wksp() : * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. * See FSE_buildCTable_wksp() for breakdown of workspace usage. */ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); /*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); /*< build a fake FSE_DTable, designed to always generate the same symbolValue */ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); /*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); /*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ typedef enum { FSE_repeat_none, /*< Cannot use the previous table */ FSE_repeat_check, /*< Can use the previous table but it must be checked */ FSE_repeat_valid /*< Can use the previous table and it is assumed to be valid */ } FSE_repeat; /* ***************************************** * FSE symbol compression API *******************************************/ /*! This API consists of small unitary functions, which highly benefit from being inlined. Hence their body are included in next section. */ typedef struct { ptrdiff_t value; const void* stateTable; const void* symbolTT; unsigned stateLog; } FSE_CState_t; static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); /*< These functions are inner components of FSE_compress_usingCTable(). They allow the creation of custom streams, mixing multiple tables and bit sources. A key property to keep in mind is that encoding and decoding are done **in reverse direction**. So the first symbol you will encode is the last you will decode, like a LIFO stack. You will need a few variables to track your CStream. They are : FSE_CTable ct; // Provided by FSE_buildCTable() BIT_CStream_t bitStream; // bitStream tracking structure FSE_CState_t state; // State tracking structure (can have several) The first thing to do is to init bitStream and state. size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); FSE_initCState(&state, ct); Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); You can then encode your input data, byte after byte. FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. Remember decoding will be done in reverse direction. FSE_encodeByte(&bitStream, &state, symbol); At any time, you can also add any bit sequence. Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders BIT_addBits(&bitStream, bitField, nbBits); The above methods don't commit data to memory, they just store it into local register, for speed. Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). Writing data to memory is a manual operation, performed by the flushBits function. BIT_flushBits(&bitStream); Your last FSE encoding operation shall be to flush your last state value(s). FSE_flushState(&bitStream, &state); Finally, you must close the bitStream. The function returns the size of CStream in bytes. If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) If there is an error, it returns an errorCode (which can be tested using FSE_isError()). size_t size = BIT_closeCStream(&bitStream); */ /* ***************************************** * FSE symbol decompression API *******************************************/ typedef struct { size_t state; const void* table; /* precise table may vary, depending on U16 */ } FSE_DState_t; static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); /*< Let's now decompose FSE_decompress_usingDTable() into its unitary components. You will decode FSE-encoded symbols from the bitStream, and also any other bitFields you put in, **in reverse order**. You will need a few variables to track your bitStream. They are : BIT_DStream_t DStream; // Stream context FSE_DState_t DState; // State context. Multiple ones are possible FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() The first thing to do is to init the bitStream. errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); You should then retrieve your initial state(s) (in reverse flushing order if you have several ones) : errorCode = FSE_initDState(&DState, &DStream, DTablePtr); You can then decode your data, symbol after symbol. For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) Note : maximum allowed nbBits is 25, for 32-bits compatibility size_t bitField = BIT_readBits(&DStream, nbBits); All above operations only read from local register (which size depends on size_t). Refueling the register from memory is manually performed by the reload method. endSignal = FSE_reloadDStream(&DStream); BIT_reloadDStream() result tells if there is still some more data to read from DStream. BIT_DStream_unfinished : there is still some data left into the DStream. BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, to properly detect the exact end of stream. After each decoded symbol, check if DStream is fully consumed using this simple test : BIT_reloadDStream(&DStream) >= BIT_DStream_completed When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. Checking if DStream has reached its end is performed by : BIT_endOfDStream(&DStream); Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. FSE_endOfDState(&DState); */ /* ***************************************** * FSE unsafe API *******************************************/ static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); /* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ /* ***************************************** * Implementation of inlined functions *******************************************/ typedef struct { int deltaFindState; U32 deltaNbBits; } FSE_symbolCompressionTransform; /* total 8 bytes */ MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) { const void* ptr = ct; const U16* u16ptr = (const U16*) ptr; const U32 tableLog = MEM_read16(ptr); statePtr->value = (ptrdiff_t)1<<tableLog; statePtr->stateTable = u16ptr+2; statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); statePtr->stateLog = tableLog; } /*! FSE_initCState2() : * Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) * uses the smallest state value possible, saving the cost of this symbol */ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) { FSE_initCState(statePtr, ct); { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; const U16* stateTable = (const U16*)(statePtr->stateTable); U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; } } MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) { FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; const U16* const stateTable = (const U16*)(statePtr->stateTable); U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); BIT_addBits(bitC, statePtr->value, nbBitsOut); statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; } MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) { BIT_addBits(bitC, statePtr->value, statePtr->stateLog); BIT_flushBits(bitC); } /* FSE_getMaxNbBits() : * Approximate maximum cost of a symbol, in bits. * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) * note 1 : assume symbolValue is valid (<= maxSymbolValue) * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) { const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; } /* FSE_bitCost() : * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) * note 1 : assume symbolValue is valid (<= maxSymbolValue) * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) { const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; U32 const threshold = (minNbBits+1) << 16; assert(tableLog < 16); assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ { U32 const tableSize = 1 << tableLog; U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ U32 const bitMultiplier = 1 << accuracyLog; assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); assert(normalizedDeltaFromThreshold <= bitMultiplier); return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; } } /* ====== Decompression ====== */ typedef struct { U16 tableLog; U16 fastMode; } FSE_DTableHeader; /* sizeof U32 */ typedef struct { unsigned short newState; unsigned char symbol; unsigned char nbBits; } FSE_decode_t; /* size == U32 */ MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) { const void* ptr = dt; const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); BIT_reloadDStream(bitD); DStatePtr->table = dt + 1; } MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) { FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; return DInfo.symbol; } MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; U32 const nbBits = DInfo.nbBits; size_t const lowBits = BIT_readBits(bitD, nbBits); DStatePtr->state = DInfo.newState + lowBits; } MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; U32 const nbBits = DInfo.nbBits; BYTE const symbol = DInfo.symbol; size_t const lowBits = BIT_readBits(bitD, nbBits); DStatePtr->state = DInfo.newState + lowBits; return symbol; } /*! FSE_decodeSymbolFast() : unsafe, only works if no symbol has a probability > 50% */ MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; U32 const nbBits = DInfo.nbBits; BYTE const symbol = DInfo.symbol; size_t const lowBits = BIT_readBitsFast(bitD, nbBits); DStatePtr->state = DInfo.newState + lowBits; return symbol; } MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) { return DStatePtr->state == 0; } #ifndef FSE_COMMONDEFS_ONLY /* ************************************************************** * Tuning parameters ****************************************************************/ /*!MEMORY_USAGE : * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) * Increasing memory usage improves compression ratio * Reduced memory usage can improve speed, due to cache effect * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ #ifndef FSE_MAX_MEMORY_USAGE # define FSE_MAX_MEMORY_USAGE 14 #endif #ifndef FSE_DEFAULT_MEMORY_USAGE # define FSE_DEFAULT_MEMORY_USAGE 13 #endif #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" #endif /*!FSE_MAX_SYMBOL_VALUE : * Maximum symbol value authorized. * Required for proper stack allocation */ #ifndef FSE_MAX_SYMBOL_VALUE # define FSE_MAX_SYMBOL_VALUE 255 #endif /* ************************************************************** * template functions type & suffix ****************************************************************/ #define FSE_FUNCTION_TYPE BYTE #define FSE_FUNCTION_EXTENSION #define FSE_DECODE_TYPE FSE_decode_t #endif /* !FSE_COMMONDEFS_ONLY */ /* *************************************************************** * Constants *****************************************************************/ #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) #define FSE_MIN_TABLELOG 5 #define FSE_TABLELOG_ABSOLUTE_MAX 15 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" #endif #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) #endif /* FSE_STATIC_LINKING_ONLY */
9 8 7 6 4 4 4 1 1 4 3 4 9 8 8 7 6 8 5 4 3 3 2 1 1 5 5 5 4 3 2 1 1 4 2 1 2 2 1 2 3 3 3 3 3 1 1 1 1 1 28 24 23 5 6 3 23 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <asm/unaligned.h> #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/dccp.h> #include <linux/sctp.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/tcp.h> struct nft_exthdr { u8 type; u8 offset; u8 len; u8 op; u8 dreg; u8 sreg; u8 flags; }; static unsigned int optlen(const u8 *opt, unsigned int offset) { /* Beware zero-length options: make finite progress */ if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0) return 1; else return opt[offset + 1]; } static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len) { if (len % NFT_REG32_SIZE) dest[len / NFT_REG32_SIZE] = 0; return skb_copy_bits(skb, offset, dest, len); } static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = &regs->data[priv->dreg]; unsigned int offset = 0; int err; if (pkt->skb->protocol != htons(ETH_P_IPV6)) goto err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, err >= 0); return; } else if (err < 0) { goto err; } offset += priv->offset; if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: regs->verdict.code = NFT_BREAK; } /* find the offset to specified option. * * If target header is found, its offset is set in *offset and return option * number. Otherwise, return negative error. * * If the first fragment doesn't contain the End of Options it is considered * invalid. */ static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned int *offset, int target) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; unsigned int start; bool found = false; __be32 info; int optlen; iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (!iph) return -EBADMSG; start = sizeof(struct iphdr); optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); if (optlen <= 0) return -ENOENT; memset(opt, 0, sizeof(struct ip_options)); /* Copy the options since __ip_options_compile() modifies * the options. */ if (skb_copy_bits(skb, start, opt->__data, optlen)) return -EBADMSG; opt->optlen = optlen; if (__ip_options_compile(net, opt, NULL, &info)) return -EBADMSG; switch (target) { case IPOPT_SSRR: case IPOPT_LSRR: if (!opt->srr) break; found = target == IPOPT_SSRR ? opt->is_strictroute : !opt->is_strictroute; if (found) *offset = opt->srr + start; break; case IPOPT_RR: if (!opt->rr) break; *offset = opt->rr + start; found = true; break; case IPOPT_RA: if (!opt->router_alert) break; *offset = opt->router_alert + start; found = true; break; default: return -EOPNOTSUPP; } return found ? target : -ENOENT; } static void nft_exthdr_ipv4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = &regs->data[priv->dreg]; struct sk_buff *skb = pkt->skb; unsigned int offset; int err; if (skb->protocol != htons(ETH_P_IP)) goto err; err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type); if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, err >= 0); return; } else if (err < 0) { goto err; } offset += priv->offset; if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: regs->verdict.code = NFT_BREAK; } static void * nft_tcp_header_pointer(const struct nft_pktinfo *pkt, unsigned int len, void *buffer, unsigned int *tcphdr_len) { struct tcphdr *tcph; if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) return NULL; tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer); if (!tcph) return NULL; *tcphdr_len = __tcp_hdrlen(tcph); if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len) return NULL; return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer); } static void nft_exthdr_tcp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int i, optl, tcphdr_len, offset; u32 *dest = &regs->data[priv->dreg]; struct tcphdr *tcph; u8 *opt; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { optl = optlen(opt, i); if (priv->type != opt[i]) continue; if (i + optl > tcphdr_len || priv->len + priv->offset > optl) goto err; offset = i + priv->offset; if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, 1); } else { if (priv->len % NFT_REG32_SIZE) dest[priv->len / NFT_REG32_SIZE] = 0; memcpy(dest, opt + offset, priv->len); } return; } err: if (priv->flags & NFT_EXTHDR_F_PRESENT) *dest = 0; else regs->verdict.code = NFT_BREAK; } static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int i, optl, tcphdr_len, offset; struct tcphdr *tcph; u8 *opt; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len)) goto err; tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt)); opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { union { __be16 v16; __be32 v32; } old, new; optl = optlen(opt, i); if (priv->type != opt[i]) continue; if (i + optl > tcphdr_len || priv->len + priv->offset > optl) goto err; offset = i + priv->offset; switch (priv->len) { case 2: old.v16 = (__force __be16)get_unaligned((u16 *)(opt + offset)); new.v16 = (__force __be16)nft_reg_load16( &regs->data[priv->sreg]); switch (priv->type) { case TCPOPT_MSS: /* increase can cause connection to stall */ if (ntohs(old.v16) <= ntohs(new.v16)) return; break; } if (old.v16 == new.v16) return; put_unaligned(new.v16, (__be16*)(opt + offset)); inet_proto_csum_replace2(&tcph->check, pkt->skb, old.v16, new.v16, false); break; case 4: new.v32 = nft_reg_load_be32(&regs->data[priv->sreg]); old.v32 = (__force __be32)get_unaligned((u32 *)(opt + offset)); if (old.v32 == new.v32) return; put_unaligned(new.v32, (__be32*)(opt + offset)); inet_proto_csum_replace4(&tcph->check, pkt->skb, old.v32, new.v32, false); break; default: WARN_ON_ONCE(1); break; } return; } return; err: regs->verdict.code = NFT_BREAK; } static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int i, tcphdr_len, optl; struct tcphdr *tcph; u8 *opt; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len)) goto drop; tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt)); opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { unsigned int j; optl = optlen(opt, i); if (priv->type != opt[i]) continue; if (i + optl > tcphdr_len) goto drop; for (j = 0; j < optl; ++j) { u16 n = TCPOPT_NOP; u16 o = opt[i+j]; if ((i + j) % 2 == 0) { o <<= 8; n <<= 8; } inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o), htons(n), false); } memset(opt + i, TCPOPT_NOP, optl); return; } /* option not found, continue. This allows to do multiple * option removals per rule. */ return; err: regs->verdict.code = NFT_BREAK; return; drop: /* can't remove, no choice but to drop */ regs->verdict.code = NF_DROP; } static void nft_exthdr_sctp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr); struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = &regs->data[priv->dreg]; const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; if (pkt->tprot != IPPROTO_SCTP) goto err; do { sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch); if (!sch || !sch->length) break; if (sch->type == priv->type) { if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, true); return; } if (priv->offset + priv->len > ntohs(sch->length) || offset + ntohs(sch->length) > pkt->skb->len) break; if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset, dest, priv->len) < 0) break; return; } offset += SCTP_PAD4(ntohs(sch->length)); } while (offset < pkt->skb->len); err: if (priv->flags & NFT_EXTHDR_F_PRESENT) nft_reg_store8(dest, false); else regs->verdict.code = NFT_BREAK; } static void nft_exthdr_dccp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int thoff, dataoff, optoff, optlen, i; u32 *dest = &regs->data[priv->dreg]; const struct dccp_hdr *dh; struct dccp_hdr _dh; if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff) goto err; thoff = nft_thoff(pkt); dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh); if (!dh) goto err; dataoff = dh->dccph_doff * sizeof(u32); optoff = __dccp_hdr_len(dh); if (dataoff <= optoff) goto err; optlen = dataoff - optoff; for (i = 0; i < optlen; ) { /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in * the length; the remaining options are at least 2B long. In * all cases, the first byte contains the option type. In * multi-byte options, the second byte contains the option * length, which must be at least two: 1 for the type plus 1 for * the length plus 0-253 for any following option data. We * aren't interested in the option data, only the type and the * length, so we don't need to read more than two bytes at a * time. */ unsigned int buflen = optlen - i; u8 buf[2], *bufp; u8 type, len; if (buflen > sizeof(buf)) buflen = sizeof(buf); bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen, &buf); if (!bufp) goto err; type = bufp[0]; if (type == priv->type) { nft_reg_store8(dest, 1); return; } if (type <= DCCPO_MAX_RESERVED) { i++; continue; } if (buflen < 2) goto err; len = bufp[1]; if (len < 2) goto err; i += len; } err: *dest = 0; } static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, }; static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; int err; if (!tb[NFTA_EXTHDR_DREG] || !tb[NFTA_EXTHDR_TYPE] || !tb[NFTA_EXTHDR_OFFSET] || !tb[NFTA_EXTHDR_LEN]) return -EINVAL; err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); if (err < 0) return err; err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); if (err < 0) return err; if (tb[NFTA_EXTHDR_FLAGS]) { err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags); if (err < 0) return err; if (flags & ~NFT_EXTHDR_F_PRESENT) return -EINVAL; } if (tb[NFTA_EXTHDR_OP]) { err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); if (err < 0) return err; } priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; priv->flags = flags; priv->op = op; return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, priv->len); } static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; int err; if (!tb[NFTA_EXTHDR_SREG] || !tb[NFTA_EXTHDR_TYPE] || !tb[NFTA_EXTHDR_OFFSET] || !tb[NFTA_EXTHDR_LEN]) return -EINVAL; if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS]) return -EINVAL; err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); if (err < 0) return err; err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); if (err < 0) return err; if (offset < 2) return -EOPNOTSUPP; switch (len) { case 2: break; case 4: break; default: return -EOPNOTSUPP; } err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); if (err < 0) return err; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; priv->flags = flags; priv->op = op; return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg, priv->len); } static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); if (tb[NFTA_EXTHDR_SREG] || tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS] || tb[NFTA_EXTHDR_OFFSET] || tb[NFTA_EXTHDR_LEN]) return -EINVAL; if (!tb[NFTA_EXTHDR_TYPE]) return -EINVAL; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->op = NFT_EXTHDR_OP_TCPOPT; return 0; } static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); int err = nft_exthdr_init(ctx, expr, tb); if (err < 0) return err; switch (priv->type) { case IPOPT_SSRR: case IPOPT_LSRR: case IPOPT_RR: case IPOPT_RA: break; default: return -EOPNOTSUPP; } return 0; } static int nft_exthdr_dccp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); int err = nft_exthdr_init(ctx, expr, tb); if (err < 0) return err; if (!(priv->flags & NFT_EXTHDR_F_PRESENT)) return -EOPNOTSUPP; return 0; } static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) { if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) return -1; return nft_exthdr_dump_common(skb, priv); } static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg)) return -1; return nft_exthdr_dump_common(skb, priv); } static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); return nft_exthdr_dump_common(skb, priv); } static bool nft_exthdr_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_exthdr *priv = nft_expr_priv(expr); const struct nft_exthdr *exthdr; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } exthdr = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->type != exthdr->type || priv->op != exthdr->op || priv->flags != exthdr->flags || priv->offset != exthdr->offset || priv->len != exthdr->len) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_ipv6_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_ipv4_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_ipv4_eval, .init = nft_exthdr_ipv4_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_tcp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_tcp_set_eval, .init = nft_exthdr_tcp_set_init, .dump = nft_exthdr_dump_set, .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_tcp_strip_eval, .init = nft_exthdr_tcp_strip_init, .dump = nft_exthdr_dump_strip, .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_sctp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_sctp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_dccp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_dccp_eval, .init = nft_exthdr_dccp_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops * nft_exthdr_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { u32 op; if (!tb[NFTA_EXTHDR_OP]) return &nft_exthdr_ipv6_ops; if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG]) return ERR_PTR(-EOPNOTSUPP); op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP])); switch (op) { case NFT_EXTHDR_OP_TCPOPT: if (tb[NFTA_EXTHDR_SREG]) return &nft_exthdr_tcp_set_ops; if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_tcp_ops; return &nft_exthdr_tcp_strip_ops; case NFT_EXTHDR_OP_IPV6: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_ipv6_ops; break; case NFT_EXTHDR_OP_IPV4: if (ctx->family != NFPROTO_IPV6) { if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_ipv4_ops; } break; case NFT_EXTHDR_OP_SCTP: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_sctp_ops; break; case NFT_EXTHDR_OP_DCCP: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_dccp_ops; break; } return ERR_PTR(-EOPNOTSUPP); } struct nft_expr_type nft_exthdr_type __read_mostly = { .name = "exthdr", .select_ops = nft_exthdr_select_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, .owner = THIS_MODULE, };
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip,port,ip type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ /* 3 Comments support added */ /* 4 Forceadd support added */ /* 5 skbinfo support added */ #define IPSET_TYPE_REV_MAX 6 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip /* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { __be32 ip; __be32 ip2; __be16 port; u8 proto; u8 padding; }; static bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->ip2 == ip2->ip2 && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipportip4_data_list(struct sk_buff *skb, const struct hash_ipportip4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportip4_data_next(struct hash_ipportip4_elem *next, const struct hash_ipportip4_elem *d) { next->ip = d->ip; next->port = d->port; } /* Common functions */ #define MTYPE hash_ipportip4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip = ntohl(e.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } port_to = port = ntohs(e.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); if (i > IPSET_MAX_RANGE) { hash_ipportip4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } } return ret; } /* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; union nf_inet_addr ip2; __be16 port; u8 proto; u8 padding; }; /* Common functions */ static bool hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, const struct hash_ipportip6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipportip6_data_list(struct sk_buff *skb, const struct hash_ipportip6_elem *data) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportip6_data_next(struct hash_ipportip6_elem *next, const struct hash_ipportip6_elem *d) { next->port = d->port; } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ipportip6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ipportip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(e.port); port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); if (retried) port = ntohs(h->next.port); for (; port <= port_to; port++) { e.port = htons(port); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } static struct ip_set_type hash_ipportip_type __read_mostly = { .name = "hash:ip,port,ip", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipportip_init(void) { return ip_set_type_register(&hash_ipportip_type); } static void __exit hash_ipportip_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ipportip_type); } module_init(hash_ipportip_init); module_exit(hash_ipportip_fini);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 1999 - 2006 Intel Corporation. */ /* e1000_hw.c * Shared functions for accessing and configuring the MAC */ #include <linux/bitfield.h> #include "e1000.h" static s32 e1000_check_downshift(struct e1000_hw *hw); static s32 e1000_check_polarity(struct e1000_hw *hw, e1000_rev_polarity *polarity); static void e1000_clear_hw_cntrs(struct e1000_hw *hw); static void e1000_clear_vfta(struct e1000_hw *hw); static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up); static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw); static s32 e1000_detect_gig_phy(struct e1000_hw *hw); static s32 e1000_get_auto_rd_done(struct e1000_hw *hw); static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, u16 *max_length); static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw); static s32 e1000_id_led_init(struct e1000_hw *hw); static void e1000_init_rx_addrs(struct e1000_hw *hw); static s32 e1000_phy_igp_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info); static s32 e1000_phy_m88_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info); static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active); static s32 e1000_wait_autoneg(struct e1000_hw *hw); static void e1000_write_reg_io(struct e1000_hw *hw, u32 offset, u32 value); static s32 e1000_set_phy_type(struct e1000_hw *hw); static void e1000_phy_init_script(struct e1000_hw *hw); static s32 e1000_setup_copper_link(struct e1000_hw *hw); static s32 e1000_setup_fiber_serdes_link(struct e1000_hw *hw); static s32 e1000_adjust_serdes_amplitude(struct e1000_hw *hw); static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw); static s32 e1000_config_mac_to_phy(struct e1000_hw *hw); static void e1000_raise_mdi_clk(struct e1000_hw *hw, u32 *ctrl); static void e1000_lower_mdi_clk(struct e1000_hw *hw, u32 *ctrl); static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, u32 data, u16 count); static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw); static s32 e1000_phy_reset_dsp(struct e1000_hw *hw); static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw); static void e1000_raise_ee_clk(struct e1000_hw *hw, u32 *eecd); static void e1000_lower_ee_clk(struct e1000_hw *hw, u32 *eecd); static void e1000_shift_out_ee_bits(struct e1000_hw *hw, u16 data, u16 count); static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, u16 phy_data); static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, u16 *phy_data); static u16 e1000_shift_in_ee_bits(struct e1000_hw *hw, u16 count); static s32 e1000_acquire_eeprom(struct e1000_hw *hw); static void e1000_release_eeprom(struct e1000_hw *hw); static void e1000_standby_eeprom(struct e1000_hw *hw); static s32 e1000_set_vco_speed(struct e1000_hw *hw); static s32 e1000_polarity_reversal_workaround(struct e1000_hw *hw); static s32 e1000_set_phy_mode(struct e1000_hw *hw); static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); /* IGP cable length table */ static const u16 e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60, 60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90, 90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120 }; static DEFINE_MUTEX(e1000_eeprom_lock); static DEFINE_SPINLOCK(e1000_phy_lock); /** * e1000_set_phy_type - Set the phy type member in the hw struct. * @hw: Struct containing variables accessed by shared code */ static s32 e1000_set_phy_type(struct e1000_hw *hw) { if (hw->mac_type == e1000_undefined) return -E1000_ERR_PHY_TYPE; switch (hw->phy_id) { case M88E1000_E_PHY_ID: case M88E1000_I_PHY_ID: case M88E1011_I_PHY_ID: case M88E1111_I_PHY_ID: case M88E1118_E_PHY_ID: hw->phy_type = e1000_phy_m88; break; case IGP01E1000_I_PHY_ID: if (hw->mac_type == e1000_82541 || hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) hw->phy_type = e1000_phy_igp; break; case RTL8211B_PHY_ID: hw->phy_type = e1000_phy_8211; break; case RTL8201N_PHY_ID: hw->phy_type = e1000_phy_8201; break; default: /* Should never have loaded on this device */ hw->phy_type = e1000_phy_undefined; return -E1000_ERR_PHY_TYPE; } return E1000_SUCCESS; } /** * e1000_phy_init_script - IGP phy init script - initializes the GbE PHY * @hw: Struct containing variables accessed by shared code */ static void e1000_phy_init_script(struct e1000_hw *hw) { u16 phy_saved_data; if (hw->phy_init_script) { msleep(20); /* Save off the current value of register 0x2F5B to be restored * at the end of this routine. */ e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); /* Disabled the PHY transmitter */ e1000_write_phy_reg(hw, 0x2F5B, 0x0003); msleep(20); e1000_write_phy_reg(hw, 0x0000, 0x0140); msleep(5); switch (hw->mac_type) { case e1000_82541: case e1000_82547: e1000_write_phy_reg(hw, 0x1F95, 0x0001); e1000_write_phy_reg(hw, 0x1F71, 0xBD21); e1000_write_phy_reg(hw, 0x1F79, 0x0018); e1000_write_phy_reg(hw, 0x1F30, 0x1600); e1000_write_phy_reg(hw, 0x1F31, 0x0014); e1000_write_phy_reg(hw, 0x1F32, 0x161C); e1000_write_phy_reg(hw, 0x1F94, 0x0003); e1000_write_phy_reg(hw, 0x1F96, 0x003F); e1000_write_phy_reg(hw, 0x2010, 0x0008); break; case e1000_82541_rev_2: case e1000_82547_rev_2: e1000_write_phy_reg(hw, 0x1F73, 0x0099); break; default: break; } e1000_write_phy_reg(hw, 0x0000, 0x3300); msleep(20); /* Now enable the transmitter */ e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); if (hw->mac_type == e1000_82547) { u16 fused, fine, coarse; /* Move to analog registers page */ e1000_read_phy_reg(hw, IGP01E1000_ANALOG_SPARE_FUSE_STATUS, &fused); if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) { e1000_read_phy_reg(hw, IGP01E1000_ANALOG_FUSE_STATUS, &fused); fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK; coarse = fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK; if (coarse > IGP01E1000_ANALOG_FUSE_COARSE_THRESH) { coarse -= IGP01E1000_ANALOG_FUSE_COARSE_10; fine -= IGP01E1000_ANALOG_FUSE_FINE_1; } else if (coarse == IGP01E1000_ANALOG_FUSE_COARSE_THRESH) fine -= IGP01E1000_ANALOG_FUSE_FINE_10; fused = (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) | (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) | (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK); e1000_write_phy_reg(hw, IGP01E1000_ANALOG_FUSE_CONTROL, fused); e1000_write_phy_reg(hw, IGP01E1000_ANALOG_FUSE_BYPASS, IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL); } } } } /** * e1000_set_mac_type - Set the mac type member in the hw struct. * @hw: Struct containing variables accessed by shared code */ s32 e1000_set_mac_type(struct e1000_hw *hw) { switch (hw->device_id) { case E1000_DEV_ID_82542: switch (hw->revision_id) { case E1000_82542_2_0_REV_ID: hw->mac_type = e1000_82542_rev2_0; break; case E1000_82542_2_1_REV_ID: hw->mac_type = e1000_82542_rev2_1; break; default: /* Invalid 82542 revision ID */ return -E1000_ERR_MAC_TYPE; } break; case E1000_DEV_ID_82543GC_FIBER: case E1000_DEV_ID_82543GC_COPPER: hw->mac_type = e1000_82543; break; case E1000_DEV_ID_82544EI_COPPER: case E1000_DEV_ID_82544EI_FIBER: case E1000_DEV_ID_82544GC_COPPER: case E1000_DEV_ID_82544GC_LOM: hw->mac_type = e1000_82544; break; case E1000_DEV_ID_82540EM: case E1000_DEV_ID_82540EM_LOM: case E1000_DEV_ID_82540EP: case E1000_DEV_ID_82540EP_LOM: case E1000_DEV_ID_82540EP_LP: hw->mac_type = e1000_82540; break; case E1000_DEV_ID_82545EM_COPPER: case E1000_DEV_ID_82545EM_FIBER: hw->mac_type = e1000_82545; break; case E1000_DEV_ID_82545GM_COPPER: case E1000_DEV_ID_82545GM_FIBER: case E1000_DEV_ID_82545GM_SERDES: hw->mac_type = e1000_82545_rev_3; break; case E1000_DEV_ID_82546EB_COPPER: case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546EB_QUAD_COPPER: hw->mac_type = e1000_82546; break; case E1000_DEV_ID_82546GB_COPPER: case E1000_DEV_ID_82546GB_FIBER: case E1000_DEV_ID_82546GB_SERDES: case E1000_DEV_ID_82546GB_PCIE: case E1000_DEV_ID_82546GB_QUAD_COPPER: case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: hw->mac_type = e1000_82546_rev_3; break; case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EI_MOBILE: case E1000_DEV_ID_82541ER_LOM: hw->mac_type = e1000_82541; break; case E1000_DEV_ID_82541ER: case E1000_DEV_ID_82541GI: case E1000_DEV_ID_82541GI_LF: case E1000_DEV_ID_82541GI_MOBILE: hw->mac_type = e1000_82541_rev_2; break; case E1000_DEV_ID_82547EI: case E1000_DEV_ID_82547EI_MOBILE: hw->mac_type = e1000_82547; break; case E1000_DEV_ID_82547GI: hw->mac_type = e1000_82547_rev_2; break; case E1000_DEV_ID_INTEL_CE4100_GBE: hw->mac_type = e1000_ce4100; break; default: /* Should never have loaded on this device */ return -E1000_ERR_MAC_TYPE; } switch (hw->mac_type) { case e1000_82541: case e1000_82547: case e1000_82541_rev_2: case e1000_82547_rev_2: hw->asf_firmware_present = true; break; default: break; } /* The 82543 chip does not count tx_carrier_errors properly in * FD mode */ if (hw->mac_type == e1000_82543) hw->bad_tx_carr_stats_fd = true; if (hw->mac_type > e1000_82544) hw->has_smbus = true; return E1000_SUCCESS; } /** * e1000_set_media_type - Set media type and TBI compatibility. * @hw: Struct containing variables accessed by shared code */ void e1000_set_media_type(struct e1000_hw *hw) { u32 status; if (hw->mac_type != e1000_82543) { /* tbi_compatibility is only valid on 82543 */ hw->tbi_compatibility_en = false; } switch (hw->device_id) { case E1000_DEV_ID_82545GM_SERDES: case E1000_DEV_ID_82546GB_SERDES: hw->media_type = e1000_media_type_internal_serdes; break; default: switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: hw->media_type = e1000_media_type_fiber; break; case e1000_ce4100: hw->media_type = e1000_media_type_copper; break; default: status = er32(STATUS); if (status & E1000_STATUS_TBIMODE) { hw->media_type = e1000_media_type_fiber; /* tbi_compatibility not valid on fiber */ hw->tbi_compatibility_en = false; } else { hw->media_type = e1000_media_type_copper; } break; } } } /** * e1000_reset_hw - reset the hardware completely * @hw: Struct containing variables accessed by shared code * * Reset the transmit and receive units; mask and clear all interrupts. */ s32 e1000_reset_hw(struct e1000_hw *hw) { u32 ctrl; u32 ctrl_ext; u32 manc; u32 led_ctrl; s32 ret_val; /* For 82542 (rev 2.0), disable MWI before issuing a device reset */ if (hw->mac_type == e1000_82542_rev2_0) { e_dbg("Disabling MWI on 82542 rev 2.0\n"); e1000_pci_clear_mwi(hw); } /* Clear interrupt mask to stop board from generating interrupts */ e_dbg("Masking off all interrupts\n"); ew32(IMC, 0xffffffff); /* Disable the Transmit and Receive units. Then delay to allow * any pending transactions to complete before we hit the MAC with * the global reset. */ ew32(RCTL, 0); ew32(TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(); /* The tbi_compatibility_on Flag must be cleared when Rctl is cleared. */ hw->tbi_compatibility_on = false; /* Delay to allow any outstanding PCI transactions to complete before * resetting the device */ msleep(10); ctrl = er32(CTRL); /* Must reset the PHY before resetting the MAC */ if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { ew32(CTRL, (ctrl | E1000_CTRL_PHY_RST)); E1000_WRITE_FLUSH(); msleep(5); } /* Issue a global reset to the MAC. This will reset the chip's * transmit, receive, DMA, and link units. It will not effect * the current PCI configuration. The global reset bit is self- * clearing, and should clear within a microsecond. */ e_dbg("Issuing a global reset to MAC\n"); switch (hw->mac_type) { case e1000_82544: case e1000_82540: case e1000_82545: case e1000_82546: case e1000_82541: case e1000_82541_rev_2: /* These controllers can't ack the 64-bit write when issuing the * reset, so use IO-mapping as a workaround to issue the reset */ E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_RST)); break; case e1000_82545_rev_3: case e1000_82546_rev_3: /* Reset is performed on a shadow of the control register */ ew32(CTRL_DUP, (ctrl | E1000_CTRL_RST)); break; case e1000_ce4100: default: ew32(CTRL, (ctrl | E1000_CTRL_RST)); break; } /* After MAC reset, force reload of EEPROM to restore power-on settings * to device. Later controllers reload the EEPROM automatically, so * just wait for reload to complete. */ switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: case e1000_82543: case e1000_82544: /* Wait for reset to complete */ udelay(10); ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_EE_RST; ew32(CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(); /* Wait for EEPROM reload */ msleep(2); break; case e1000_82541: case e1000_82541_rev_2: case e1000_82547: case e1000_82547_rev_2: /* Wait for EEPROM reload */ msleep(20); break; default: /* Auto read done will delay 5ms or poll based on mac type */ ret_val = e1000_get_auto_rd_done(hw); if (ret_val) return ret_val; break; } /* Disable HW ARPs on ASF enabled adapters */ if (hw->mac_type >= e1000_82540) { manc = er32(MANC); manc &= ~(E1000_MANC_ARP_EN); ew32(MANC, manc); } if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { e1000_phy_init_script(hw); /* Configure activity LED after PHY reset */ led_ctrl = er32(LEDCTL); led_ctrl &= IGP_ACTIVITY_LED_MASK; led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ew32(LEDCTL, led_ctrl); } /* Clear interrupt mask to stop board from generating interrupts */ e_dbg("Masking off all interrupts\n"); ew32(IMC, 0xffffffff); /* Clear any pending interrupt events. */ er32(ICR); /* If MWI was previously enabled, reenable it. */ if (hw->mac_type == e1000_82542_rev2_0) { if (hw->pci_cmd_word & PCI_COMMAND_INVALIDATE) e1000_pci_set_mwi(hw); } return E1000_SUCCESS; } /** * e1000_init_hw - Performs basic configuration of the adapter. * @hw: Struct containing variables accessed by shared code * * Assumes that the controller has previously been reset and is in a * post-reset uninitialized state. Initializes the receive address registers, * multicast table, and VLAN filter table. Calls routines to setup link * configuration and flow control settings. Clears all on-chip counters. Leaves * the transmit and receive units disabled and uninitialized. */ s32 e1000_init_hw(struct e1000_hw *hw) { u32 ctrl; u32 i; s32 ret_val; u32 mta_size; u32 ctrl_ext; /* Initialize Identification LED */ ret_val = e1000_id_led_init(hw); if (ret_val) { e_dbg("Error Initializing Identification LED\n"); return ret_val; } /* Set the media type and TBI compatibility */ e1000_set_media_type(hw); /* Disabling VLAN filtering. */ e_dbg("Initializing the IEEE VLAN\n"); if (hw->mac_type < e1000_82545_rev_3) ew32(VET, 0); e1000_clear_vfta(hw); /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */ if (hw->mac_type == e1000_82542_rev2_0) { e_dbg("Disabling MWI on 82542 rev 2.0\n"); e1000_pci_clear_mwi(hw); ew32(RCTL, E1000_RCTL_RST); E1000_WRITE_FLUSH(); msleep(5); } /* Setup the receive address. This involves initializing all of the * Receive Address Registers (RARs 0 - 15). */ e1000_init_rx_addrs(hw); /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */ if (hw->mac_type == e1000_82542_rev2_0) { ew32(RCTL, 0); E1000_WRITE_FLUSH(); msleep(1); if (hw->pci_cmd_word & PCI_COMMAND_INVALIDATE) e1000_pci_set_mwi(hw); } /* Zero out the Multicast HASH table */ e_dbg("Zeroing the MTA\n"); mta_size = E1000_MC_TBL_SIZE; for (i = 0; i < mta_size; i++) { E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); /* use write flush to prevent Memory Write Block (MWB) from * occurring when accessing our register space */ E1000_WRITE_FLUSH(); } /* Set the PCI priority bit correctly in the CTRL register. This * determines if the adapter gives priority to receives, or if it * gives equal priority to transmits and receives. Valid only on * 82542 and 82543 silicon. */ if (hw->dma_fairness && hw->mac_type <= e1000_82543) { ctrl = er32(CTRL); ew32(CTRL, ctrl | E1000_CTRL_PRIOR); } switch (hw->mac_type) { case e1000_82545_rev_3: case e1000_82546_rev_3: break; default: /* Workaround for PCI-X problem when BIOS sets MMRBC * incorrectly. */ if (hw->bus_type == e1000_bus_type_pcix && e1000_pcix_get_mmrbc(hw) > 2048) e1000_pcix_set_mmrbc(hw, 2048); break; } /* Call a subroutine to configure the link and setup flow control. */ ret_val = e1000_setup_link(hw); /* Set the transmit descriptor write-back policy */ if (hw->mac_type > e1000_82544) { ctrl = er32(TXDCTL); ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB; ew32(TXDCTL, ctrl); } /* Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs(hw); if (hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER || hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3) { ctrl_ext = er32(CTRL_EXT); /* Relaxed ordering must be disabled to avoid a parity * error crash in a PCI slot. */ ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ew32(CTRL_EXT, ctrl_ext); } return ret_val; } /** * e1000_adjust_serdes_amplitude - Adjust SERDES output amplitude based on EEPROM setting. * @hw: Struct containing variables accessed by shared code. */ static s32 e1000_adjust_serdes_amplitude(struct e1000_hw *hw) { u16 eeprom_data; s32 ret_val; if (hw->media_type != e1000_media_type_internal_serdes) return E1000_SUCCESS; switch (hw->mac_type) { case e1000_82545_rev_3: case e1000_82546_rev_3: break; default: return E1000_SUCCESS; } ret_val = e1000_read_eeprom(hw, EEPROM_SERDES_AMPLITUDE, 1, &eeprom_data); if (ret_val) return ret_val; if (eeprom_data != EEPROM_RESERVED_WORD) { /* Adjust SERDES output amplitude only. */ eeprom_data &= EEPROM_SERDES_AMPLITUDE_MASK; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_EXT_CTRL, eeprom_data); if (ret_val) return ret_val; } return E1000_SUCCESS; } /** * e1000_setup_link - Configures flow control and link settings. * @hw: Struct containing variables accessed by shared code * * Determines which flow control settings to use. Calls the appropriate media- * specific link configuration function. Configures the flow control settings. * Assuming the adapter has a valid link partner, a valid link should be * established. Assumes the hardware has previously been reset and the * transmitter and receiver are not enabled. */ s32 e1000_setup_link(struct e1000_hw *hw) { u32 ctrl_ext; s32 ret_val; u16 eeprom_data; /* Read and store word 0x0F of the EEPROM. This word contains bits * that determine the hardware's default PAUSE (flow control) mode, * a bit that determines whether the HW defaults to enabling or * disabling auto-negotiation, and the direction of the * SW defined pins. If there is no SW over-ride of the flow * control setting, then the variable hw->fc will * be initialized based on a value in the EEPROM. */ if (hw->fc == E1000_FC_DEFAULT) { ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data); if (ret_val) { e_dbg("EEPROM Read Error\n"); return -E1000_ERR_EEPROM; } if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 0) hw->fc = E1000_FC_NONE; else if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == EEPROM_WORD0F_ASM_DIR) hw->fc = E1000_FC_TX_PAUSE; else hw->fc = E1000_FC_FULL; } /* We want to save off the original Flow Control configuration just * in case we get disconnected and then reconnected into a different * hub or switch with different Flow Control capabilities. */ if (hw->mac_type == e1000_82542_rev2_0) hw->fc &= (~E1000_FC_TX_PAUSE); if ((hw->mac_type < e1000_82543) && (hw->report_tx_early == 1)) hw->fc &= (~E1000_FC_RX_PAUSE); hw->original_fc = hw->fc; e_dbg("After fix-ups FlowControl is now = %x\n", hw->fc); /* Take the 4 bits from EEPROM word 0x0F that determine the initial * polarity value for the SW controlled pins, and setup the * Extended Device Control reg with that info. * This is needed because one of the SW controlled pins is used for * signal detection. So this should be done before e1000_setup_pcs_link() * or e1000_phy_setup() is called. */ if (hw->mac_type == e1000_82543) { ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data); if (ret_val) { e_dbg("EEPROM Read Error\n"); return -E1000_ERR_EEPROM; } ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) << SWDPIO__EXT_SHIFT); ew32(CTRL_EXT, ctrl_ext); } /* Call the necessary subroutine to configure the link. */ ret_val = (hw->media_type == e1000_media_type_copper) ? e1000_setup_copper_link(hw) : e1000_setup_fiber_serdes_link(hw); /* Initialize the flow control address, type, and PAUSE timer * registers to their default values. This is done even if flow * control is disabled, because it does not hurt anything to * initialize these registers. */ e_dbg("Initializing the Flow Control address, type and timer regs\n"); ew32(FCT, FLOW_CONTROL_TYPE); ew32(FCAH, FLOW_CONTROL_ADDRESS_HIGH); ew32(FCAL, FLOW_CONTROL_ADDRESS_LOW); ew32(FCTTV, hw->fc_pause_time); /* Set the flow control receive threshold registers. Normally, * these registers will be set to a default threshold that may be * adjusted later by the driver's runtime code. However, if the * ability to transmit pause frames in not enabled, then these * registers will be set to 0. */ if (!(hw->fc & E1000_FC_TX_PAUSE)) { ew32(FCRTL, 0); ew32(FCRTH, 0); } else { /* We need to set up the Receive Threshold high and low water * marks as well as (optionally) enabling the transmission of * XON frames. */ if (hw->fc_send_xon) { ew32(FCRTL, (hw->fc_low_water | E1000_FCRTL_XONE)); ew32(FCRTH, hw->fc_high_water); } else { ew32(FCRTL, hw->fc_low_water); ew32(FCRTH, hw->fc_high_water); } } return ret_val; } /** * e1000_setup_fiber_serdes_link - prepare fiber or serdes link * @hw: Struct containing variables accessed by shared code * * Manipulates Physical Coding Sublayer functions in order to configure * link. Assumes the hardware has been previously reset and the transmitter * and receiver are not enabled. */ static s32 e1000_setup_fiber_serdes_link(struct e1000_hw *hw) { u32 ctrl; u32 status; u32 txcw = 0; u32 i; u32 signal = 0; s32 ret_val; /* On adapters with a MAC newer than 82544, SWDP 1 will be * set when the optics detect a signal. On older adapters, it will be * cleared when there is a signal. This applies to fiber media only. * If we're on serdes media, adjust the output amplitude to value * set in the EEPROM. */ ctrl = er32(CTRL); if (hw->media_type == e1000_media_type_fiber) signal = (hw->mac_type > e1000_82544) ? E1000_CTRL_SWDPIN1 : 0; ret_val = e1000_adjust_serdes_amplitude(hw); if (ret_val) return ret_val; /* Take the link out of reset */ ctrl &= ~(E1000_CTRL_LRST); /* Adjust VCO speed to improve BER performance */ ret_val = e1000_set_vco_speed(hw); if (ret_val) return ret_val; e1000_config_collision_dist(hw); /* Check for a software override of the flow control settings, and setup * the device accordingly. If auto-negotiation is enabled, then * software will have to set the "PAUSE" bits to the correct value in * the Tranmsit Config Word Register (TXCW) and re-start * auto-negotiation. However, if auto-negotiation is disabled, then * software will have to manually configure the two flow control enable * bits in the CTRL register. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause frames, but * not send pause frames). * 2: Tx flow control is enabled (we can send pause frames but we do * not support receiving pause frames). * 3: Both Rx and TX flow control (symmetric) are enabled. */ switch (hw->fc) { case E1000_FC_NONE: /* Flow ctrl is completely disabled by a software over-ride */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); break; case E1000_FC_RX_PAUSE: /* Rx Flow control is enabled and Tx Flow control is disabled by * a software over-ride. Since there really isn't a way to * advertise that we are capable of Rx Pause ONLY, we will * advertise that we support both symmetric and asymmetric Rx * PAUSE. Later, we will disable the adapter's ability to send * PAUSE frames. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); break; case E1000_FC_TX_PAUSE: /* Tx Flow control is enabled, and Rx Flow control is disabled, * by a software over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); break; case E1000_FC_FULL: /* Flow control (both Rx and Tx) is enabled by a software * over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); break; default: e_dbg("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; } /* Since auto-negotiation is enabled, take the link out of reset (the * link will be in reset, because we previously reset the chip). This * will restart auto-negotiation. If auto-negotiation is successful * then the link-up status bit will be set and the flow control enable * bits (RFCE and TFCE) will be set according to their negotiated value. */ e_dbg("Auto-negotiation enabled\n"); ew32(TXCW, txcw); ew32(CTRL, ctrl); E1000_WRITE_FLUSH(); hw->txcw = txcw; msleep(1); /* If we have a signal (the cable is plugged in) then poll for a * "Link-Up" indication in the Device Status Register. Time-out if a * link isn't seen in 500 milliseconds seconds (Auto-negotiation should * complete in less than 500 milliseconds even if the other end is doing * it in SW). For internal serdes, we just assume a signal is present, * then poll. */ if (hw->media_type == e1000_media_type_internal_serdes || (er32(CTRL) & E1000_CTRL_SWDPIN1) == signal) { e_dbg("Looking for Link\n"); for (i = 0; i < (LINK_UP_TIMEOUT / 10); i++) { msleep(10); status = er32(STATUS); if (status & E1000_STATUS_LU) break; } if (i == (LINK_UP_TIMEOUT / 10)) { e_dbg("Never got a valid link from auto-neg!!!\n"); hw->autoneg_failed = 1; /* AutoNeg failed to achieve a link, so we'll call * e1000_check_for_link. This routine will force the * link up if we detect a signal. This will allow us to * communicate with non-autonegotiating link partners. */ ret_val = e1000_check_for_link(hw); if (ret_val) { e_dbg("Error while checking for link\n"); return ret_val; } hw->autoneg_failed = 0; } else { hw->autoneg_failed = 0; e_dbg("Valid Link Found\n"); } } else { e_dbg("No Signal Detected\n"); } return E1000_SUCCESS; } /** * e1000_copper_link_rtl_setup - Copper link setup for e1000_phy_rtl series. * @hw: Struct containing variables accessed by shared code * * Commits changes to PHY configuration by calling e1000_phy_reset(). */ static s32 e1000_copper_link_rtl_setup(struct e1000_hw *hw) { s32 ret_val; /* SW reset the PHY so all changes take effect */ ret_val = e1000_phy_reset(hw); if (ret_val) { e_dbg("Error Resetting the PHY\n"); return ret_val; } return E1000_SUCCESS; } static s32 gbe_dhg_phy_setup(struct e1000_hw *hw) { s32 ret_val; u32 ctrl_aux; switch (hw->phy_type) { case e1000_phy_8211: ret_val = e1000_copper_link_rtl_setup(hw); if (ret_val) { e_dbg("e1000_copper_link_rtl_setup failed!\n"); return ret_val; } break; case e1000_phy_8201: /* Set RMII mode */ ctrl_aux = er32(CTL_AUX); ctrl_aux |= E1000_CTL_AUX_RMII; ew32(CTL_AUX, ctrl_aux); E1000_WRITE_FLUSH(); /* Disable the J/K bits required for receive */ ctrl_aux = er32(CTL_AUX); ctrl_aux |= 0x4; ctrl_aux &= ~0x2; ew32(CTL_AUX, ctrl_aux); E1000_WRITE_FLUSH(); ret_val = e1000_copper_link_rtl_setup(hw); if (ret_val) { e_dbg("e1000_copper_link_rtl_setup failed!\n"); return ret_val; } break; default: e_dbg("Error Resetting the PHY\n"); return E1000_ERR_PHY_TYPE; } return E1000_SUCCESS; } /** * e1000_copper_link_preconfig - early configuration for copper * @hw: Struct containing variables accessed by shared code * * Make sure we have a valid PHY and change PHY mode before link setup. */ static s32 e1000_copper_link_preconfig(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 phy_data; ctrl = er32(CTRL); /* With 82543, we need to force speed and duplex on the MAC equal to * what the PHY speed and duplex configuration is. In addition, we need * to perform a hardware reset on the PHY to take it out of reset. */ if (hw->mac_type > e1000_82543) { ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ew32(CTRL, ctrl); } else { ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU); ew32(CTRL, ctrl); ret_val = e1000_phy_hw_reset(hw); if (ret_val) return ret_val; } /* Make sure we have a valid PHY */ ret_val = e1000_detect_gig_phy(hw); if (ret_val) { e_dbg("Error, did not detect valid phy.\n"); return ret_val; } e_dbg("Phy ID = %x\n", hw->phy_id); /* Set PHY to class A mode (if necessary) */ ret_val = e1000_set_phy_mode(hw); if (ret_val) return ret_val; if ((hw->mac_type == e1000_82545_rev_3) || (hw->mac_type == e1000_82546_rev_3)) { ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); phy_data |= 0x00000008; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); } if (hw->mac_type <= e1000_82543 || hw->mac_type == e1000_82541 || hw->mac_type == e1000_82547 || hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) hw->phy_reset_disable = false; return E1000_SUCCESS; } /** * e1000_copper_link_igp_setup - Copper link setup for e1000_phy_igp series. * @hw: Struct containing variables accessed by shared code */ static s32 e1000_copper_link_igp_setup(struct e1000_hw *hw) { u32 led_ctrl; s32 ret_val; u16 phy_data; if (hw->phy_reset_disable) return E1000_SUCCESS; ret_val = e1000_phy_reset(hw); if (ret_val) { e_dbg("Error Resetting the PHY\n"); return ret_val; } /* Wait 15ms for MAC to configure PHY from eeprom settings */ msleep(15); /* Configure activity LED after PHY reset */ led_ctrl = er32(LEDCTL); led_ctrl &= IGP_ACTIVITY_LED_MASK; led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ew32(LEDCTL, led_ctrl); /* The NVM settings will configure LPLU in D3 for IGP2 and IGP3 PHYs */ if (hw->phy_type == e1000_phy_igp) { /* disable lplu d3 during driver init */ ret_val = e1000_set_d3_lplu_state(hw, false); if (ret_val) { e_dbg("Error Disabling LPLU D3\n"); return ret_val; } } /* Configure mdi-mdix settings */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); if (ret_val) return ret_val; if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { hw->dsp_config_state = e1000_dsp_config_disabled; /* Force MDI for earlier revs of the IGP PHY */ phy_data &= ~(IGP01E1000_PSCR_AUTO_MDIX | IGP01E1000_PSCR_FORCE_MDI_MDIX); hw->mdix = 1; } else { hw->dsp_config_state = e1000_dsp_config_enabled; phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; switch (hw->mdix) { case 1: phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; break; case 2: phy_data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; break; case 0: default: phy_data |= IGP01E1000_PSCR_AUTO_MDIX; break; } } ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); if (ret_val) return ret_val; /* set auto-master slave resolution settings */ if (hw->autoneg) { e1000_ms_type phy_ms_setting = hw->master_slave; if (hw->ffe_config_state == e1000_ffe_config_active) hw->ffe_config_state = e1000_ffe_config_enabled; if (hw->dsp_config_state == e1000_dsp_config_activated) hw->dsp_config_state = e1000_dsp_config_enabled; /* when autonegotiation advertisement is only 1000Mbps then we * should disable SmartSpeed and enable Auto MasterSlave * resolution as hardware default. */ if (hw->autoneg_advertised == ADVERTISE_1000_FULL) { /* Disable SmartSpeed */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data); if (ret_val) return ret_val; phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data); if (ret_val) return ret_val; /* Set auto Master/Slave resolution process */ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data); if (ret_val) return ret_val; phy_data &= ~CR_1000T_MS_ENABLE; ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data); if (ret_val) return ret_val; } ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data); if (ret_val) return ret_val; /* load defaults for future use */ hw->original_master_slave = (phy_data & CR_1000T_MS_ENABLE) ? ((phy_data & CR_1000T_MS_VALUE) ? e1000_ms_force_master : e1000_ms_force_slave) : e1000_ms_auto; switch (phy_ms_setting) { case e1000_ms_force_master: phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); break; case e1000_ms_force_slave: phy_data |= CR_1000T_MS_ENABLE; phy_data &= ~(CR_1000T_MS_VALUE); break; case e1000_ms_auto: phy_data &= ~CR_1000T_MS_ENABLE; break; default: break; } ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data); if (ret_val) return ret_val; } return E1000_SUCCESS; } /** * e1000_copper_link_mgp_setup - Copper link setup for e1000_phy_m88 series. * @hw: Struct containing variables accessed by shared code */ static s32 e1000_copper_link_mgp_setup(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; if (hw->phy_reset_disable) return E1000_SUCCESS; /* Enable CRS on TX. This must be set for half-duplex operation. */ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; /* Options: * MDI/MDI-X = 0 (default) * 0 - Auto for all speeds * 1 - MDI mode * 2 - MDI-X mode * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) */ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; switch (hw->mdix) { case 1: phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; break; case 2: phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; break; case 3: phy_data |= M88E1000_PSCR_AUTO_X_1000T; break; case 0: default: phy_data |= M88E1000_PSCR_AUTO_X_MODE; break; } /* Options: * disable_polarity_correction = 0 (default) * Automatic Correction for Reversed Cable Polarity * 0 - Disabled * 1 - Enabled */ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; if (hw->disable_polarity_correction == 1) phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; if (hw->phy_revision < M88E1011_I_REV_4) { /* Force TX_CLK in the Extended PHY Specific Control Register * to 25MHz clock. */ ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= M88E1000_EPSCR_TX_CLK_25; if ((hw->phy_revision == E1000_REVISION_2) && (hw->phy_id == M88E1111_I_PHY_ID)) { /* Vidalia Phy, set the downshift counter to 5x */ phy_data &= ~(M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK); phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; } else { /* Configure Master and Slave downshift values */ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; } } /* SW Reset the PHY so all changes take effect */ ret_val = e1000_phy_reset(hw); if (ret_val) { e_dbg("Error Resetting the PHY\n"); return ret_val; } return E1000_SUCCESS; } /** * e1000_copper_link_autoneg - setup auto-neg * @hw: Struct containing variables accessed by shared code * * Setup auto-negotiation and flow control advertisements, * and then perform auto-negotiation. */ static s32 e1000_copper_link_autoneg(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; /* Perform some bounds checking on the hw->autoneg_advertised * parameter. If this variable is zero, then set it to the default. */ hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT; /* If autoneg_advertised is zero, we assume it was not defaulted * by the calling code so we set to advertise full capability. */ if (hw->autoneg_advertised == 0) hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT; /* IFE/RTL8201N PHY only supports 10/100 */ if (hw->phy_type == e1000_phy_8201) hw->autoneg_advertised &= AUTONEG_ADVERTISE_10_100_ALL; e_dbg("Reconfiguring auto-neg advertisement params\n"); ret_val = e1000_phy_setup_autoneg(hw); if (ret_val) { e_dbg("Error Setting up Auto-Negotiation\n"); return ret_val; } e_dbg("Restarting Auto-Neg\n"); /* Restart auto-negotiation by setting the Auto Neg Enable bit and * the Auto Neg Restart bit in the PHY control register. */ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data); if (ret_val) return ret_val; /* Does the user want to wait for Auto-Neg to complete here, or * check at a later time (for example, callback routine). */ if (hw->wait_autoneg_complete) { ret_val = e1000_wait_autoneg(hw); if (ret_val) { e_dbg ("Error while waiting for autoneg to complete\n"); return ret_val; } } hw->get_link_status = true; return E1000_SUCCESS; } /** * e1000_copper_link_postconfig - post link setup * @hw: Struct containing variables accessed by shared code * * Config the MAC and the PHY after link is up. * 1) Set up the MAC to the current PHY speed/duplex * if we are on 82543. If we * are on newer silicon, we only need to configure * collision distance in the Transmit Control Register. * 2) Set up flow control on the MAC to that established with * the link partner. * 3) Config DSP to improve Gigabit link quality for some PHY revisions. */ static s32 e1000_copper_link_postconfig(struct e1000_hw *hw) { s32 ret_val; if ((hw->mac_type >= e1000_82544) && (hw->mac_type != e1000_ce4100)) { e1000_config_collision_dist(hw); } else { ret_val = e1000_config_mac_to_phy(hw); if (ret_val) { e_dbg("Error configuring MAC to PHY settings\n"); return ret_val; } } ret_val = e1000_config_fc_after_link_up(hw); if (ret_val) { e_dbg("Error Configuring Flow Control\n"); return ret_val; } /* Config DSP to improve Giga link quality */ if (hw->phy_type == e1000_phy_igp) { ret_val = e1000_config_dsp_after_link_change(hw, true); if (ret_val) { e_dbg("Error Configuring DSP after link up\n"); return ret_val; } } return E1000_SUCCESS; } /** * e1000_setup_copper_link - phy/speed/duplex setting * @hw: Struct containing variables accessed by shared code * * Detects which PHY is present and sets up the speed and duplex */ static s32 e1000_setup_copper_link(struct e1000_hw *hw) { s32 ret_val; u16 i; u16 phy_data; /* Check if it is a valid PHY and set PHY mode if necessary. */ ret_val = e1000_copper_link_preconfig(hw); if (ret_val) return ret_val; if (hw->phy_type == e1000_phy_igp) { ret_val = e1000_copper_link_igp_setup(hw); if (ret_val) return ret_val; } else if (hw->phy_type == e1000_phy_m88) { ret_val = e1000_copper_link_mgp_setup(hw); if (ret_val) return ret_val; } else { ret_val = gbe_dhg_phy_setup(hw); if (ret_val) { e_dbg("gbe_dhg_phy_setup failed!\n"); return ret_val; } } if (hw->autoneg) { /* Setup autoneg and flow control advertisement * and perform autonegotiation */ ret_val = e1000_copper_link_autoneg(hw); if (ret_val) return ret_val; } else { /* PHY will be set to 10H, 10F, 100H,or 100F * depending on value from forced_speed_duplex. */ e_dbg("Forcing speed and duplex\n"); ret_val = e1000_phy_force_speed_duplex(hw); if (ret_val) { e_dbg("Error Forcing Speed and Duplex\n"); return ret_val; } } /* Check link status. Wait up to 100 microseconds for link to become * valid. */ for (i = 0; i < 10; i++) { ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; if (phy_data & MII_SR_LINK_STATUS) { /* Config the MAC and PHY after link is up */ ret_val = e1000_copper_link_postconfig(hw); if (ret_val) return ret_val; e_dbg("Valid link established!!!\n"); return E1000_SUCCESS; } udelay(10); } e_dbg("Unable to establish link!!!\n"); return E1000_SUCCESS; } /** * e1000_phy_setup_autoneg - phy settings * @hw: Struct containing variables accessed by shared code * * Configures PHY autoneg and flow control advertisement settings */ s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) { s32 ret_val; u16 mii_autoneg_adv_reg; u16 mii_1000t_ctrl_reg; /* Read the MII Auto-Neg Advertisement Register (Address 4). */ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); if (ret_val) return ret_val; /* Read the MII 1000Base-T Control Register (Address 9). */ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg); if (ret_val) return ret_val; else if (hw->phy_type == e1000_phy_8201) mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK; /* Need to parse both autoneg_advertised and fc and set up * the appropriate PHY registers. First we will parse for * autoneg_advertised software override. Since we can advertise * a plethora of combinations, we need to check each bit * individually. */ /* First we clear all the 10/100 mb speed bits in the Auto-Neg * Advertisement Register (Address 4) and the 1000 mb speed bits in * the 1000Base-T Control Register (Address 9). */ mii_autoneg_adv_reg &= ~REG4_SPEED_MASK; mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK; e_dbg("autoneg_advertised %x\n", hw->autoneg_advertised); /* Do we want to advertise 10 Mb Half Duplex? */ if (hw->autoneg_advertised & ADVERTISE_10_HALF) { e_dbg("Advertise 10mb Half duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; } /* Do we want to advertise 10 Mb Full Duplex? */ if (hw->autoneg_advertised & ADVERTISE_10_FULL) { e_dbg("Advertise 10mb Full duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; } /* Do we want to advertise 100 Mb Half Duplex? */ if (hw->autoneg_advertised & ADVERTISE_100_HALF) { e_dbg("Advertise 100mb Half duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; } /* Do we want to advertise 100 Mb Full Duplex? */ if (hw->autoneg_advertised & ADVERTISE_100_FULL) { e_dbg("Advertise 100mb Full duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; } /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ if (hw->autoneg_advertised & ADVERTISE_1000_HALF) { e_dbg ("Advertise 1000mb Half duplex requested, request denied!\n"); } /* Do we want to advertise 1000 Mb Full Duplex? */ if (hw->autoneg_advertised & ADVERTISE_1000_FULL) { e_dbg("Advertise 1000mb Full duplex\n"); mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; } /* Check for a software override of the flow control settings, and * setup the PHY advertisement registers accordingly. If * auto-negotiation is enabled, then software will have to set the * "PAUSE" bits to the correct value in the Auto-Negotiation * Advertisement Register (PHY_AUTONEG_ADV) and re-start * auto-negotiation. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause frames * but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames * but we do not support receiving pause frames). * 3: Both Rx and TX flow control (symmetric) are enabled. * other: No software override. The flow control configuration * in the EEPROM is used. */ switch (hw->fc) { case E1000_FC_NONE: /* 0 */ /* Flow control (RX & TX) is completely disabled by a * software over-ride. */ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; case E1000_FC_RX_PAUSE: /* 1 */ /* RX Flow control is enabled, and TX Flow control is * disabled, by a software over-ride. */ /* Since there really isn't a way to advertise that we are * capable of RX Pause ONLY, we will advertise that we * support both symmetric and asymmetric RX PAUSE. Later * (in e1000_config_fc_after_link_up) we will disable the * hw's ability to send PAUSE frames. */ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; case E1000_FC_TX_PAUSE: /* 2 */ /* TX Flow control is enabled, and RX Flow control is * disabled, by a software over-ride. */ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; break; case E1000_FC_FULL: /* 3 */ /* Flow control (both RX and TX) is enabled by a software * over-ride. */ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; default: e_dbg("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; } ret_val = e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); if (ret_val) return ret_val; e_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); if (hw->phy_type == e1000_phy_8201) { mii_1000t_ctrl_reg = 0; } else { ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); if (ret_val) return ret_val; } return E1000_SUCCESS; } /** * e1000_phy_force_speed_duplex - force link settings * @hw: Struct containing variables accessed by shared code * * Force PHY speed and duplex settings to hw->forced_speed_duplex */ static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 mii_ctrl_reg; u16 mii_status_reg; u16 phy_data; u16 i; /* Turn off Flow control if we are forcing speed and duplex. */ hw->fc = E1000_FC_NONE; e_dbg("hw->fc = %d\n", hw->fc); /* Read the Device Control Register. */ ctrl = er32(CTRL); /* Set the bits to Force Speed and Duplex in the Device Ctrl Reg. */ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ctrl &= ~(DEVICE_SPEED_MASK); /* Clear the Auto Speed Detect Enable bit. */ ctrl &= ~E1000_CTRL_ASDE; /* Read the MII Control Register. */ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &mii_ctrl_reg); if (ret_val) return ret_val; /* We need to disable autoneg in order to force link and duplex. */ mii_ctrl_reg &= ~MII_CR_AUTO_NEG_EN; /* Are we forcing Full or Half Duplex? */ if (hw->forced_speed_duplex == e1000_100_full || hw->forced_speed_duplex == e1000_10_full) { /* We want to force full duplex so we SET the full duplex bits * in the Device and MII Control Registers. */ ctrl |= E1000_CTRL_FD; mii_ctrl_reg |= MII_CR_FULL_DUPLEX; e_dbg("Full Duplex\n"); } else { /* We want to force half duplex so we CLEAR the full duplex bits * in the Device and MII Control Registers. */ ctrl &= ~E1000_CTRL_FD; mii_ctrl_reg &= ~MII_CR_FULL_DUPLEX; e_dbg("Half Duplex\n"); } /* Are we forcing 100Mbps??? */ if (hw->forced_speed_duplex == e1000_100_full || hw->forced_speed_duplex == e1000_100_half) { /* Set the 100Mb bit and turn off the 1000Mb and 10Mb bits. */ ctrl |= E1000_CTRL_SPD_100; mii_ctrl_reg |= MII_CR_SPEED_100; mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10); e_dbg("Forcing 100mb "); } else { /* Set the 10Mb bit and turn off the 1000Mb and 100Mb bits. */ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); mii_ctrl_reg |= MII_CR_SPEED_10; mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); e_dbg("Forcing 10mb "); } e1000_config_collision_dist(hw); /* Write the configured values back to the Device Control Reg. */ ew32(CTRL, ctrl); if (hw->phy_type == e1000_phy_m88) { ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; /* Clear Auto-Crossover to force MDI manually. M88E1000 requires * MDI forced whenever speed are duplex are forced. */ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; e_dbg("M88E1000 PSCR: %x\n", phy_data); /* Need to reset the PHY or these changes will be ignored */ mii_ctrl_reg |= MII_CR_RESET; /* Disable MDI-X support for 10/100 */ } else { /* Clear Auto-Crossover to force MDI manually. IGP requires MDI * forced whenever speed or duplex are forced. */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); if (ret_val) return ret_val; phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); if (ret_val) return ret_val; } /* Write back the modified PHY MII control register. */ ret_val = e1000_write_phy_reg(hw, PHY_CTRL, mii_ctrl_reg); if (ret_val) return ret_val; udelay(1); /* The wait_autoneg_complete flag may be a little misleading here. * Since we are forcing speed and duplex, Auto-Neg is not enabled. * But we do want to delay for a period while forcing only so we * don't generate false No Link messages. So we will wait here * only if the user has set wait_autoneg_complete to 1, which is * the default. */ if (hw->wait_autoneg_complete) { /* We will wait for autoneg to complete. */ e_dbg("Waiting for forced speed/duplex link.\n"); mii_status_reg = 0; /* Wait for autoneg to complete or 4.5 seconds to expire */ for (i = PHY_FORCE_TIME; i > 0; i--) { /* Read the MII Status Register and wait for Auto-Neg * Complete bit to be set. */ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; if (mii_status_reg & MII_SR_LINK_STATUS) break; msleep(100); } if ((i == 0) && (hw->phy_type == e1000_phy_m88)) { /* We didn't get link. Reset the DSP and wait again * for link. */ ret_val = e1000_phy_reset_dsp(hw); if (ret_val) { e_dbg("Error Resetting PHY DSP\n"); return ret_val; } } /* This loop will early-out if the link condition has been * met */ for (i = PHY_FORCE_TIME; i > 0; i--) { if (mii_status_reg & MII_SR_LINK_STATUS) break; msleep(100); /* Read the MII Status Register and wait for Auto-Neg * Complete bit to be set. */ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; } } if (hw->phy_type == e1000_phy_m88) { /* Because we reset the PHY above, we need to re-force TX_CLK in * the Extended PHY Specific Control Register to 25MHz clock. * This value defaults back to a 2.5MHz clock when the PHY is * reset. */ ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= M88E1000_EPSCR_TX_CLK_25; ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; /* In addition, because of the s/w reset above, we need to * enable CRS on Tx. This must be set for both full and half * duplex operation. */ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) && (!hw->autoneg) && (hw->forced_speed_duplex == e1000_10_full || hw->forced_speed_duplex == e1000_10_half)) { ret_val = e1000_polarity_reversal_workaround(hw); if (ret_val) return ret_val; } } return E1000_SUCCESS; } /** * e1000_config_collision_dist - set collision distance register * @hw: Struct containing variables accessed by shared code * * Sets the collision distance in the Transmit Control register. * Link should have been established previously. Reads the speed and duplex * information from the Device Status register. */ void e1000_config_collision_dist(struct e1000_hw *hw) { u32 tctl, coll_dist; if (hw->mac_type < e1000_82543) coll_dist = E1000_COLLISION_DISTANCE_82542; else coll_dist = E1000_COLLISION_DISTANCE; tctl = er32(TCTL); tctl &= ~E1000_TCTL_COLD; tctl |= coll_dist << E1000_COLD_SHIFT; ew32(TCTL, tctl); E1000_WRITE_FLUSH(); } /** * e1000_config_mac_to_phy - sync phy and mac settings * @hw: Struct containing variables accessed by shared code * * Sets MAC speed and duplex settings to reflect the those in the PHY * The contents of the PHY register containing the needed information need to * be passed in. */ static s32 e1000_config_mac_to_phy(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 phy_data; /* 82544 or newer MAC, Auto Speed Detection takes care of * MAC speed/duplex configuration. */ if ((hw->mac_type >= e1000_82544) && (hw->mac_type != e1000_ce4100)) return E1000_SUCCESS; /* Read the Device Control Register and set the bits to Force Speed * and Duplex. */ ctrl = er32(CTRL); ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS); switch (hw->phy_type) { case e1000_phy_8201: ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data); if (ret_val) return ret_val; if (phy_data & RTL_PHY_CTRL_FD) ctrl |= E1000_CTRL_FD; else ctrl &= ~E1000_CTRL_FD; if (phy_data & RTL_PHY_CTRL_SPD_100) ctrl |= E1000_CTRL_SPD_100; else ctrl |= E1000_CTRL_SPD_10; e1000_config_collision_dist(hw); break; default: /* Set up duplex in the Device Control and Transmit Control * registers depending on negotiated values. */ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) return ret_val; if (phy_data & M88E1000_PSSR_DPLX) ctrl |= E1000_CTRL_FD; else ctrl &= ~E1000_CTRL_FD; e1000_config_collision_dist(hw); /* Set up speed in the Device Control register depending on * negotiated values. */ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) ctrl |= E1000_CTRL_SPD_1000; else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS) ctrl |= E1000_CTRL_SPD_100; } /* Write the configured values back to the Device Control Reg. */ ew32(CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_force_mac_fc - force flow control settings * @hw: Struct containing variables accessed by shared code * * Forces the MAC's flow control settings. * Sets the TFCE and RFCE bits in the device control register to reflect * the adapter settings. TFCE and RFCE need to be explicitly set by * software when a Copper PHY is used because autonegotiation is managed * by the PHY rather than the MAC. Software must also configure these * bits when link is forced on a fiber connection. */ s32 e1000_force_mac_fc(struct e1000_hw *hw) { u32 ctrl; /* Get the current configuration of the Device Control Register */ ctrl = er32(CTRL); /* Because we didn't get link via the internal auto-negotiation * mechanism (we either forced link or we got link via PHY * auto-neg), we have to manually enable/disable transmit an * receive flow control. * * The "Case" statement below enables/disable flow control * according to the "hw->fc" parameter. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause * frames but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames * but we do not receive pause frames). * 3: Both Rx and TX flow control (symmetric) is enabled. * other: No other values should be possible at this point. */ switch (hw->fc) { case E1000_FC_NONE: ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); break; case E1000_FC_RX_PAUSE: ctrl &= (~E1000_CTRL_TFCE); ctrl |= E1000_CTRL_RFCE; break; case E1000_FC_TX_PAUSE: ctrl &= (~E1000_CTRL_RFCE); ctrl |= E1000_CTRL_TFCE; break; case E1000_FC_FULL: ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); break; default: e_dbg("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; } /* Disable TX Flow Control for 82542 (rev 2.0) */ if (hw->mac_type == e1000_82542_rev2_0) ctrl &= (~E1000_CTRL_TFCE); ew32(CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_config_fc_after_link_up - configure flow control after autoneg * @hw: Struct containing variables accessed by shared code * * Configures flow control settings after link is established * Should be called immediately after a valid link has been established. * Forces MAC flow control settings if link was forced. When in MII/GMII mode * and autonegotiation is enabled, the MAC flow control settings will be set * based on the flow control negotiated by the PHY. In TBI mode, the TFCE * and RFCE bits will be automatically set to the negotiated flow control mode. */ static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw) { s32 ret_val; u16 mii_status_reg; u16 mii_nway_adv_reg; u16 mii_nway_lp_ability_reg; u16 speed; u16 duplex; /* Check for the case where we have fiber media and auto-neg failed * so we had to force link. In this case, we need to force the * configuration of the MAC to match the "fc" parameter. */ if (((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed)) || ((hw->media_type == e1000_media_type_internal_serdes) && (hw->autoneg_failed)) || ((hw->media_type == e1000_media_type_copper) && (!hw->autoneg))) { ret_val = e1000_force_mac_fc(hw); if (ret_val) { e_dbg("Error forcing flow control settings\n"); return ret_val; } } /* Check for the case where we have copper media and auto-neg is * enabled. In this case, we need to check and see if Auto-Neg * has completed, and if so, how the PHY and link partner has * flow control configured. */ if ((hw->media_type == e1000_media_type_copper) && hw->autoneg) { /* Read the MII Status Register and check to see if AutoNeg * has completed. We read this twice because this reg has * some "sticky" (latched) bits. */ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; if (mii_status_reg & MII_SR_AUTONEG_COMPLETE) { /* The AutoNeg process has completed, so we now need to * read both the Auto Negotiation Advertisement Register * (Address 4) and the Auto_Negotiation Base Page * Ability Register (Address 5) to determine how flow * control was negotiated. */ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg); if (ret_val) return ret_val; /* Two bits in the Auto Negotiation Advertisement * Register (Address 4) and two bits in the Auto * Negotiation Base Page Ability Register (Address 5) * determine flow control for both the PHY and the link * partner. The following table, taken out of the IEEE * 802.3ab/D6.0 dated March 25, 1999, describes these * PAUSE resolution bits and how flow control is * determined based upon these settings. * NOTE: DC = Don't Care * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution *-------|---------|-------|---------|------------------ * 0 | 0 | DC | DC | E1000_FC_NONE * 0 | 1 | 0 | DC | E1000_FC_NONE * 0 | 1 | 1 | 0 | E1000_FC_NONE * 0 | 1 | 1 | 1 | E1000_FC_TX_PAUSE * 1 | 0 | 0 | DC | E1000_FC_NONE * 1 | DC | 1 | DC | E1000_FC_FULL * 1 | 1 | 0 | 0 | E1000_FC_NONE * 1 | 1 | 0 | 1 | E1000_FC_RX_PAUSE * */ /* Are both PAUSE bits set to 1? If so, this implies * Symmetric Flow Control is enabled at both ends. The * ASM_DIR bits are irrelevant per the spec. * * For Symmetric Flow Control: * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|------------------ * 1 | DC | 1 | DC | E1000_FC_FULL * */ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { /* Now we need to check if the user selected Rx * ONLY of pause frames. In this case, we had * to advertise FULL flow control because we * could not advertise Rx ONLY. Hence, we must * now check to see if we need to turn OFF the * TRANSMISSION of PAUSE frames. */ if (hw->original_fc == E1000_FC_FULL) { hw->fc = E1000_FC_FULL; e_dbg("Flow Control = FULL.\n"); } else { hw->fc = E1000_FC_RX_PAUSE; e_dbg ("Flow Control = RX PAUSE frames only.\n"); } } /* For receiving PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|------------------ * 0 | 1 | 1 | 1 | E1000_FC_TX_PAUSE * */ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc = E1000_FC_TX_PAUSE; e_dbg ("Flow Control = TX PAUSE frames only.\n"); } /* For transmitting PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|------------------ * 1 | 1 | 0 | 1 | E1000_FC_RX_PAUSE * */ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc = E1000_FC_RX_PAUSE; e_dbg ("Flow Control = RX PAUSE frames only.\n"); } /* Per the IEEE spec, at this point flow control should * be disabled. However, we want to consider that we * could be connected to a legacy switch that doesn't * advertise desired flow control, but can be forced on * the link partner. So if we advertised no flow * control, that is what we will resolve to. If we * advertised some kind of receive capability (Rx Pause * Only or Full Flow Control) and the link partner * advertised none, we will configure ourselves to * enable Rx Flow Control only. We can do this safely * for two reasons: If the link partner really * didn't want flow control enabled, and we enable Rx, * no harm done since we won't be receiving any PAUSE * frames anyway. If the intent on the link partner was * to have flow control enabled, then by us enabling Rx * only, we can at least receive pause frames and * process them. This is a good idea because in most * cases, since we are predominantly a server NIC, more * times than not we will be asked to delay transmission * of packets than asking our link partner to pause * transmission of frames. */ else if ((hw->original_fc == E1000_FC_NONE || hw->original_fc == E1000_FC_TX_PAUSE) || hw->fc_strict_ieee) { hw->fc = E1000_FC_NONE; e_dbg("Flow Control = NONE.\n"); } else { hw->fc = E1000_FC_RX_PAUSE; e_dbg ("Flow Control = RX PAUSE frames only.\n"); } /* Now we need to do one last check... If we auto- * negotiated to HALF DUPLEX, flow control should not be * enabled per IEEE 802.3 spec. */ ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); if (ret_val) { e_dbg ("Error getting link speed and duplex\n"); return ret_val; } if (duplex == HALF_DUPLEX) hw->fc = E1000_FC_NONE; /* Now we call a subroutine to actually force the MAC * controller to use the correct flow control settings. */ ret_val = e1000_force_mac_fc(hw); if (ret_val) { e_dbg ("Error forcing flow control settings\n"); return ret_val; } } else { e_dbg ("Copper PHY and Auto Neg has not completed.\n"); } } return E1000_SUCCESS; } /** * e1000_check_for_serdes_link_generic - Check for link (Serdes) * @hw: pointer to the HW structure * * Checks for link up on the hardware. If link is not up and we have * a signal, then we need to force link up. */ static s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw) { u32 rxcw; u32 ctrl; u32 status; s32 ret_val = E1000_SUCCESS; ctrl = er32(CTRL); status = er32(STATUS); rxcw = er32(RXCW); /* If we don't have link (auto-negotiation failed or link partner * cannot auto-negotiate), and our link partner is not trying to * auto-negotiate with us (we are receiving idles or data), * we need to force link up. We also need to give auto-negotiation * time to complete. */ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ if ((!(status & E1000_STATUS_LU)) && (!(rxcw & E1000_RXCW_C))) { if (hw->autoneg_failed == 0) { hw->autoneg_failed = 1; goto out; } e_dbg("NOT RXing /C/, disable AutoNeg and force link.\n"); /* Disable auto-negotiation in the TXCW register */ ew32(TXCW, (hw->txcw & ~E1000_TXCW_ANE)); /* Force link-up and also force full-duplex. */ ctrl = er32(CTRL); ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ew32(CTRL, ctrl); /* Configure Flow Control after forcing link up. */ ret_val = e1000_config_fc_after_link_up(hw); if (ret_val) { e_dbg("Error configuring flow control\n"); goto out; } } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { /* If we are forcing link and we are receiving /C/ ordered * sets, re-enable auto-negotiation in the TXCW register * and disable forced link in the Device Control register * in an attempt to auto-negotiate with our link partner. */ e_dbg("RXing /C/, enable AutoNeg and stop forcing link.\n"); ew32(TXCW, hw->txcw); ew32(CTRL, (ctrl & ~E1000_CTRL_SLU)); hw->serdes_has_link = true; } else if (!(E1000_TXCW_ANE & er32(TXCW))) { /* If we force link for non-auto-negotiation switch, check * link status based on MAC synchronization for internal * serdes media type. */ /* SYNCH bit and IV bit are sticky. */ udelay(10); rxcw = er32(RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { hw->serdes_has_link = true; e_dbg("SERDES: Link up - forced.\n"); } } else { hw->serdes_has_link = false; e_dbg("SERDES: Link down - force failed.\n"); } } if (E1000_TXCW_ANE & er32(TXCW)) { status = er32(STATUS); if (status & E1000_STATUS_LU) { /* SYNCH bit and IV bit are sticky, so reread rxcw. */ udelay(10); rxcw = er32(RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { hw->serdes_has_link = true; e_dbg("SERDES: Link up - autoneg " "completed successfully.\n"); } else { hw->serdes_has_link = false; e_dbg("SERDES: Link down - invalid" "codewords detected in autoneg.\n"); } } else { hw->serdes_has_link = false; e_dbg("SERDES: Link down - no sync.\n"); } } else { hw->serdes_has_link = false; e_dbg("SERDES: Link down - autoneg failed\n"); } } out: return ret_val; } /** * e1000_check_for_link * @hw: Struct containing variables accessed by shared code * * Checks to see if the link status of the hardware has changed. * Called by any function that needs to check the link status of the adapter. */ s32 e1000_check_for_link(struct e1000_hw *hw) { u32 status; u32 rctl; u32 icr; s32 ret_val; u16 phy_data; er32(CTRL); status = er32(STATUS); /* On adapters with a MAC newer than 82544, SW Definable pin 1 will be * set when the optics detect a signal. On older adapters, it will be * cleared when there is a signal. This applies to fiber media only. */ if ((hw->media_type == e1000_media_type_fiber) || (hw->media_type == e1000_media_type_internal_serdes)) { er32(RXCW); if (hw->media_type == e1000_media_type_fiber) { if (status & E1000_STATUS_LU) hw->get_link_status = false; } } /* If we have a copper PHY then we only want to go out to the PHY * registers to see if Auto-Neg has completed and/or if our link * status has changed. The get_link_status flag will be set if we * receive a Link Status Change interrupt or we have Rx Sequence * Errors. */ if ((hw->media_type == e1000_media_type_copper) && hw->get_link_status) { /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. * Read the register twice since the link bit is sticky. */ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; if (phy_data & MII_SR_LINK_STATUS) { hw->get_link_status = false; /* Check if there was DownShift, must be checked * immediately after link-up */ e1000_check_downshift(hw); /* If we are on 82544 or 82543 silicon and speed/duplex * are forced to 10H or 10F, then we will implement the * polarity reversal workaround. We disable interrupts * first, and upon returning, place the devices * interrupt state to its previous value except for the * link status change interrupt which will * happen due to the execution of this workaround. */ if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) && (!hw->autoneg) && (hw->forced_speed_duplex == e1000_10_full || hw->forced_speed_duplex == e1000_10_half)) { ew32(IMC, 0xffffffff); ret_val = e1000_polarity_reversal_workaround(hw); icr = er32(ICR); ew32(ICS, (icr & ~E1000_ICS_LSC)); ew32(IMS, IMS_ENABLE_MASK); } } else { /* No link detected */ e1000_config_dsp_after_link_change(hw, false); return 0; } /* If we are forcing speed/duplex, then we simply return since * we have already determined whether we have link or not. */ if (!hw->autoneg) return -E1000_ERR_CONFIG; /* optimize the dsp settings for the igp phy */ e1000_config_dsp_after_link_change(hw, true); /* We have a M88E1000 PHY and Auto-Neg is enabled. If we * have Si on board that is 82544 or newer, Auto * Speed Detection takes care of MAC speed/duplex * configuration. So we only need to configure Collision * Distance in the MAC. Otherwise, we need to force * speed/duplex on the MAC to the current PHY speed/duplex * settings. */ if ((hw->mac_type >= e1000_82544) && (hw->mac_type != e1000_ce4100)) e1000_config_collision_dist(hw); else { ret_val = e1000_config_mac_to_phy(hw); if (ret_val) { e_dbg ("Error configuring MAC to PHY settings\n"); return ret_val; } } /* Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control settings * because we may have had to re-autoneg with a different link * partner. */ ret_val = e1000_config_fc_after_link_up(hw); if (ret_val) { e_dbg("Error configuring flow control\n"); return ret_val; } /* At this point we know that we are on copper and we have * auto-negotiated link. These are conditions for checking the * link partner capability register. We use the link speed to * determine if TBI compatibility needs to be turned on or off. * If the link is not at gigabit speed, then TBI compatibility * is not needed. If we are at gigabit speed, we turn on TBI * compatibility. */ if (hw->tbi_compatibility_en) { u16 speed, duplex; ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); if (ret_val) { e_dbg ("Error getting link speed and duplex\n"); return ret_val; } if (speed != SPEED_1000) { /* If link speed is not set to gigabit speed, we * do not need to enable TBI compatibility. */ if (hw->tbi_compatibility_on) { /* If we previously were in the mode, * turn it off. */ rctl = er32(RCTL); rctl &= ~E1000_RCTL_SBP; ew32(RCTL, rctl); hw->tbi_compatibility_on = false; } } else { /* If TBI compatibility is was previously off, * turn it on. For compatibility with a TBI link * partner, we will store bad packets. Some * frames have an additional byte on the end and * will look like CRC errors to the hardware. */ if (!hw->tbi_compatibility_on) { hw->tbi_compatibility_on = true; rctl = er32(RCTL); rctl |= E1000_RCTL_SBP; ew32(RCTL, rctl); } } } } if ((hw->media_type == e1000_media_type_fiber) || (hw->media_type == e1000_media_type_internal_serdes)) e1000_check_for_serdes_link_generic(hw); return E1000_SUCCESS; } /** * e1000_get_speed_and_duplex * @hw: Struct containing variables accessed by shared code * @speed: Speed of the connection * @duplex: Duplex setting of the connection * * Detects the current speed and duplex settings of the hardware. */ s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) { u32 status; s32 ret_val; u16 phy_data; if (hw->mac_type >= e1000_82543) { status = er32(STATUS); if (status & E1000_STATUS_SPEED_1000) { *speed = SPEED_1000; e_dbg("1000 Mbs, "); } else if (status & E1000_STATUS_SPEED_100) { *speed = SPEED_100; e_dbg("100 Mbs, "); } else { *speed = SPEED_10; e_dbg("10 Mbs, "); } if (status & E1000_STATUS_FD) { *duplex = FULL_DUPLEX; e_dbg("Full Duplex\n"); } else { *duplex = HALF_DUPLEX; e_dbg(" Half Duplex\n"); } } else { e_dbg("1000 Mbs, Full Duplex\n"); *speed = SPEED_1000; *duplex = FULL_DUPLEX; } /* IGP01 PHY may advertise full duplex operation after speed downgrade * even if it is operating at half duplex. Here we set the duplex * settings to match the duplex in the link partner's capabilities. */ if (hw->phy_type == e1000_phy_igp && hw->speed_downgraded) { ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_data); if (ret_val) return ret_val; if (!(phy_data & NWAY_ER_LP_NWAY_CAPS)) *duplex = HALF_DUPLEX; else { ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_data); if (ret_val) return ret_val; if ((*speed == SPEED_100 && !(phy_data & NWAY_LPAR_100TX_FD_CAPS)) || (*speed == SPEED_10 && !(phy_data & NWAY_LPAR_10T_FD_CAPS))) *duplex = HALF_DUPLEX; } } return E1000_SUCCESS; } /** * e1000_wait_autoneg * @hw: Struct containing variables accessed by shared code * * Blocks until autoneg completes or times out (~4.5 seconds) */ static s32 e1000_wait_autoneg(struct e1000_hw *hw) { s32 ret_val; u16 i; u16 phy_data; e_dbg("Waiting for Auto-Neg to complete.\n"); /* We will wait for autoneg to complete or 4.5 seconds to expire. */ for (i = PHY_AUTO_NEG_TIME; i > 0; i--) { /* Read the MII Status Register and wait for Auto-Neg * Complete bit to be set. */ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; if (phy_data & MII_SR_AUTONEG_COMPLETE) return E1000_SUCCESS; msleep(100); } return E1000_SUCCESS; } /** * e1000_raise_mdi_clk - Raises the Management Data Clock * @hw: Struct containing variables accessed by shared code * @ctrl: Device control register's current value */ static void e1000_raise_mdi_clk(struct e1000_hw *hw, u32 *ctrl) { /* Raise the clock input to the Management Data Clock (by setting the * MDC bit), and then delay 10 microseconds. */ ew32(CTRL, (*ctrl | E1000_CTRL_MDC)); E1000_WRITE_FLUSH(); udelay(10); } /** * e1000_lower_mdi_clk - Lowers the Management Data Clock * @hw: Struct containing variables accessed by shared code * @ctrl: Device control register's current value */ static void e1000_lower_mdi_clk(struct e1000_hw *hw, u32 *ctrl) { /* Lower the clock input to the Management Data Clock (by clearing the * MDC bit), and then delay 10 microseconds. */ ew32(CTRL, (*ctrl & ~E1000_CTRL_MDC)); E1000_WRITE_FLUSH(); udelay(10); } /** * e1000_shift_out_mdi_bits - Shifts data bits out to the PHY * @hw: Struct containing variables accessed by shared code * @data: Data to send out to the PHY * @count: Number of bits to shift out * * Bits are shifted out in MSB to LSB order. */ static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, u32 data, u16 count) { u32 ctrl; u32 mask; /* We need to shift "count" number of bits out to the PHY. So, the value * in the "data" parameter will be shifted out to the PHY one bit at a * time. In order to do this, "data" must be broken down into bits. */ mask = 0x01; mask <<= (count - 1); ctrl = er32(CTRL); /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */ ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR); while (mask) { /* A "1" is shifted out to the PHY by setting the MDIO bit to * "1" and then raising and lowering the Management Data Clock. * A "0" is shifted out to the PHY by setting the MDIO bit to * "0" and then raising and lowering the clock. */ if (data & mask) ctrl |= E1000_CTRL_MDIO; else ctrl &= ~E1000_CTRL_MDIO; ew32(CTRL, ctrl); E1000_WRITE_FLUSH(); udelay(10); e1000_raise_mdi_clk(hw, &ctrl); e1000_lower_mdi_clk(hw, &ctrl); mask = mask >> 1; } } /** * e1000_shift_in_mdi_bits - Shifts data bits in from the PHY * @hw: Struct containing variables accessed by shared code * * Bits are shifted in MSB to LSB order. */ static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw) { u32 ctrl; u16 data = 0; u8 i; /* In order to read a register from the PHY, we need to shift in a total * of 18 bits from the PHY. The first two bit (turnaround) times are * used to avoid contention on the MDIO pin when a read operation is * performed. These two bits are ignored by us and thrown away. Bits are * "shifted in" by raising the input to the Management Data Clock * (setting the MDC bit), and then reading the value of the MDIO bit. */ ctrl = er32(CTRL); /* Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as * input. */ ctrl &= ~E1000_CTRL_MDIO_DIR; ctrl &= ~E1000_CTRL_MDIO; ew32(CTRL, ctrl); E1000_WRITE_FLUSH(); /* Raise and Lower the clock before reading in the data. This accounts * for the turnaround bits. The first clock occurred when we clocked out * the last bit of the Register Address. */ e1000_raise_mdi_clk(hw, &ctrl); e1000_lower_mdi_clk(hw, &ctrl); for (data = 0, i = 0; i < 16; i++) { data = data << 1; e1000_raise_mdi_clk(hw, &ctrl); ctrl = er32(CTRL); /* Check to see if we shifted in a "1". */ if (ctrl & E1000_CTRL_MDIO) data |= 1; e1000_lower_mdi_clk(hw, &ctrl); } e1000_raise_mdi_clk(hw, &ctrl); e1000_lower_mdi_clk(hw, &ctrl); return data; } /** * e1000_read_phy_reg - read a phy register * @hw: Struct containing variables accessed by shared code * @reg_addr: address of the PHY register to read * @phy_data: pointer to the value on the PHY register * * Reads the value from a PHY register, if the value is on a specific non zero * page, sets the page first. */ s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 *phy_data) { u32 ret_val; unsigned long flags; spin_lock_irqsave(&e1000_phy_lock, flags); if ((hw->phy_type == e1000_phy_igp) && (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, (u16) reg_addr); if (ret_val) goto out; } ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, phy_data); out: spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; } static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, u16 *phy_data) { u32 i; u32 mdic = 0; const u32 phy_addr = (hw->mac_type == e1000_ce4100) ? hw->phy_addr : 1; if (reg_addr > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", reg_addr); return -E1000_ERR_PARAM; } if (hw->mac_type > e1000_82543) { /* Set up Op-code, Phy Address, and register address in the MDI * Control register. The MAC will take care of interfacing with * the PHY to retrieve the desired data. */ if (hw->mac_type == e1000_ce4100) { mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) | (phy_addr << E1000_MDIC_PHY_SHIFT) | (INTEL_CE_GBE_MDIC_OP_READ) | (INTEL_CE_GBE_MDIC_GO)); writel(mdic, E1000_MDIO_CMD); /* Poll the ready bit to see if the MDI read * completed */ for (i = 0; i < 64; i++) { udelay(50); mdic = readl(E1000_MDIO_CMD); if (!(mdic & INTEL_CE_GBE_MDIC_GO)) break; } if (mdic & INTEL_CE_GBE_MDIC_GO) { e_dbg("MDI Read did not complete\n"); return -E1000_ERR_PHY; } mdic = readl(E1000_MDIO_STS); if (mdic & INTEL_CE_GBE_MDIC_READ_ERROR) { e_dbg("MDI Read Error\n"); return -E1000_ERR_PHY; } *phy_data = (u16)mdic; } else { mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) | (phy_addr << E1000_MDIC_PHY_SHIFT) | (E1000_MDIC_OP_READ)); ew32(MDIC, mdic); /* Poll the ready bit to see if the MDI read * completed */ for (i = 0; i < 64; i++) { udelay(50); mdic = er32(MDIC); if (mdic & E1000_MDIC_READY) break; } if (!(mdic & E1000_MDIC_READY)) { e_dbg("MDI Read did not complete\n"); return -E1000_ERR_PHY; } if (mdic & E1000_MDIC_ERROR) { e_dbg("MDI Error\n"); return -E1000_ERR_PHY; } *phy_data = (u16)mdic; } } else { /* We must first send a preamble through the MDIO pin to signal * the beginning of an MII instruction. This is done by sending * 32 consecutive "1" bits. */ e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); /* Now combine the next few fields that are required for a read * operation. We use this method instead of calling the * e1000_shift_out_mdi_bits routine five different times. The * format of a MII read instruction consists of a shift out of * 14 bits and is defined as follows: * <Preamble><SOF><Op Code><Phy Addr><Reg Addr> * followed by a shift in of 18 bits. This first two bits * shifted in are TurnAround bits used to avoid contention on * the MDIO pin when a READ operation is performed. These two * bits are thrown away followed by a shift in of 16 bits which * contains the desired data. */ mdic = ((reg_addr) | (phy_addr << 5) | (PHY_OP_READ << 10) | (PHY_SOF << 12)); e1000_shift_out_mdi_bits(hw, mdic, 14); /* Now that we've shifted out the read command to the MII, we * need to "shift in" the 16-bit value (18 total bits) of the * requested PHY register address. */ *phy_data = e1000_shift_in_mdi_bits(hw); } return E1000_SUCCESS; } /** * e1000_write_phy_reg - write a phy register * * @hw: Struct containing variables accessed by shared code * @reg_addr: address of the PHY register to write * @phy_data: data to write to the PHY * * Writes a value to a PHY register */ s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 phy_data) { u32 ret_val; unsigned long flags; spin_lock_irqsave(&e1000_phy_lock, flags); if ((hw->phy_type == e1000_phy_igp) && (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, (u16)reg_addr); if (ret_val) { spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; } } ret_val = e1000_write_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, phy_data); spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; } static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, u16 phy_data) { u32 i; u32 mdic = 0; const u32 phy_addr = (hw->mac_type == e1000_ce4100) ? hw->phy_addr : 1; if (reg_addr > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", reg_addr); return -E1000_ERR_PARAM; } if (hw->mac_type > e1000_82543) { /* Set up Op-code, Phy Address, register address, and data * intended for the PHY register in the MDI Control register. * The MAC will take care of interfacing with the PHY to send * the desired data. */ if (hw->mac_type == e1000_ce4100) { mdic = (((u32)phy_data) | (reg_addr << E1000_MDIC_REG_SHIFT) | (phy_addr << E1000_MDIC_PHY_SHIFT) | (INTEL_CE_GBE_MDIC_OP_WRITE) | (INTEL_CE_GBE_MDIC_GO)); writel(mdic, E1000_MDIO_CMD); /* Poll the ready bit to see if the MDI read * completed */ for (i = 0; i < 640; i++) { udelay(5); mdic = readl(E1000_MDIO_CMD); if (!(mdic & INTEL_CE_GBE_MDIC_GO)) break; } if (mdic & INTEL_CE_GBE_MDIC_GO) { e_dbg("MDI Write did not complete\n"); return -E1000_ERR_PHY; } } else { mdic = (((u32)phy_data) | (reg_addr << E1000_MDIC_REG_SHIFT) | (phy_addr << E1000_MDIC_PHY_SHIFT) | (E1000_MDIC_OP_WRITE)); ew32(MDIC, mdic); /* Poll the ready bit to see if the MDI read * completed */ for (i = 0; i < 641; i++) { udelay(5); mdic = er32(MDIC); if (mdic & E1000_MDIC_READY) break; } if (!(mdic & E1000_MDIC_READY)) { e_dbg("MDI Write did not complete\n"); return -E1000_ERR_PHY; } } } else { /* We'll need to use the SW defined pins to shift the write * command out to the PHY. We first send a preamble to the PHY * to signal the beginning of the MII instruction. This is done * by sending 32 consecutive "1" bits. */ e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); /* Now combine the remaining required fields that will indicate * a write operation. We use this method instead of calling the * e1000_shift_out_mdi_bits routine for each field in the * command. The format of a MII write instruction is as follows: * <Preamble><SOF><OpCode><PhyAddr><RegAddr><Turnaround><Data>. */ mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) | (PHY_OP_WRITE << 12) | (PHY_SOF << 14)); mdic <<= 16; mdic |= (u32)phy_data; e1000_shift_out_mdi_bits(hw, mdic, 32); } return E1000_SUCCESS; } /** * e1000_phy_hw_reset - reset the phy, hardware style * @hw: Struct containing variables accessed by shared code * * Returns the PHY to the power-on reset state */ s32 e1000_phy_hw_reset(struct e1000_hw *hw) { u32 ctrl, ctrl_ext; u32 led_ctrl; e_dbg("Resetting Phy...\n"); if (hw->mac_type > e1000_82543) { /* Read the device control register and assert the * E1000_CTRL_PHY_RST bit. Then, take it out of reset. * For e1000 hardware, we delay for 10ms between the assert * and de-assert. */ ctrl = er32(CTRL); ew32(CTRL, ctrl | E1000_CTRL_PHY_RST); E1000_WRITE_FLUSH(); msleep(10); ew32(CTRL, ctrl); E1000_WRITE_FLUSH(); } else { /* Read the Extended Device Control Register, assert the * PHY_RESET_DIR bit to put the PHY into reset. Then, take it * out of reset. */ ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR; ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA; ew32(CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(); msleep(10); ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA; ew32(CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(); } udelay(150); if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { /* Configure activity LED after PHY reset */ led_ctrl = er32(LEDCTL); led_ctrl &= IGP_ACTIVITY_LED_MASK; led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ew32(LEDCTL, led_ctrl); } /* Wait for FW to finish PHY configuration. */ return e1000_get_phy_cfg_done(hw); } /** * e1000_phy_reset - reset the phy to commit settings * @hw: Struct containing variables accessed by shared code * * Resets the PHY * Sets bit 15 of the MII Control register */ s32 e1000_phy_reset(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; switch (hw->phy_type) { case e1000_phy_igp: ret_val = e1000_phy_hw_reset(hw); if (ret_val) return ret_val; break; default: ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= MII_CR_RESET; ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data); if (ret_val) return ret_val; udelay(1); break; } if (hw->phy_type == e1000_phy_igp) e1000_phy_init_script(hw); return E1000_SUCCESS; } /** * e1000_detect_gig_phy - check the phy type * @hw: Struct containing variables accessed by shared code * * Probes the expected PHY address for known PHY IDs */ static s32 e1000_detect_gig_phy(struct e1000_hw *hw) { s32 phy_init_status, ret_val; u16 phy_id_high, phy_id_low; bool match = false; if (hw->phy_id != 0) return E1000_SUCCESS; /* Read the PHY ID Registers to identify which PHY is onboard. */ ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high); if (ret_val) return ret_val; hw->phy_id = (u32)(phy_id_high << 16); udelay(20); ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low); if (ret_val) return ret_val; hw->phy_id |= (u32)(phy_id_low & PHY_REVISION_MASK); hw->phy_revision = (u32)phy_id_low & ~PHY_REVISION_MASK; switch (hw->mac_type) { case e1000_82543: if (hw->phy_id == M88E1000_E_PHY_ID) match = true; break; case e1000_82544: if (hw->phy_id == M88E1000_I_PHY_ID) match = true; break; case e1000_82540: case e1000_82545: case e1000_82545_rev_3: case e1000_82546: case e1000_82546_rev_3: if (hw->phy_id == M88E1011_I_PHY_ID) match = true; break; case e1000_ce4100: if ((hw->phy_id == RTL8211B_PHY_ID) || (hw->phy_id == RTL8201N_PHY_ID) || (hw->phy_id == M88E1118_E_PHY_ID)) match = true; break; case e1000_82541: case e1000_82541_rev_2: case e1000_82547: case e1000_82547_rev_2: if (hw->phy_id == IGP01E1000_I_PHY_ID) match = true; break; default: e_dbg("Invalid MAC type %d\n", hw->mac_type); return -E1000_ERR_CONFIG; } phy_init_status = e1000_set_phy_type(hw); if ((match) && (phy_init_status == E1000_SUCCESS)) { e_dbg("PHY ID 0x%X detected\n", hw->phy_id); return E1000_SUCCESS; } e_dbg("Invalid PHY ID 0x%X\n", hw->phy_id); return -E1000_ERR_PHY; } /** * e1000_phy_reset_dsp - reset DSP * @hw: Struct containing variables accessed by shared code * * Resets the PHY's DSP */ static s32 e1000_phy_reset_dsp(struct e1000_hw *hw) { s32 ret_val; do { ret_val = e1000_write_phy_reg(hw, 29, 0x001d); if (ret_val) break; ret_val = e1000_write_phy_reg(hw, 30, 0x00c1); if (ret_val) break; ret_val = e1000_write_phy_reg(hw, 30, 0x0000); if (ret_val) break; ret_val = E1000_SUCCESS; } while (0); return ret_val; } /** * e1000_phy_igp_get_info - get igp specific registers * @hw: Struct containing variables accessed by shared code * @phy_info: PHY information structure * * Get PHY information from various PHY registers for igp PHY only. */ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info) { s32 ret_val; u16 phy_data, min_length, max_length, average; e1000_rev_polarity polarity; /* The downshift status is checked only once, after link is established, * and it stored in the hw->speed_downgraded parameter. */ phy_info->downshift = (e1000_downshift) hw->speed_downgraded; /* IGP01E1000 does not need to support it. */ phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_normal; /* IGP01E1000 always correct polarity reversal */ phy_info->polarity_correction = e1000_polarity_reversal_enabled; /* Check polarity status */ ret_val = e1000_check_polarity(hw, &polarity); if (ret_val) return ret_val; phy_info->cable_polarity = polarity; ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &phy_data); if (ret_val) return ret_val; phy_info->mdix_mode = (e1000_auto_x_mode)FIELD_GET(IGP01E1000_PSSR_MDIX, phy_data); if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) == IGP01E1000_PSSR_SPEED_1000MBPS) { /* Local/Remote Receiver Information are only valid @ 1000 * Mbps */ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) return ret_val; phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS, phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS, phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; /* Get cable length */ ret_val = e1000_get_cable_length(hw, &min_length, &max_length); if (ret_val) return ret_val; /* Translate to old method */ average = (max_length + min_length) / 2; if (average <= e1000_igp_cable_length_50) phy_info->cable_length = e1000_cable_length_50; else if (average <= e1000_igp_cable_length_80) phy_info->cable_length = e1000_cable_length_50_80; else if (average <= e1000_igp_cable_length_110) phy_info->cable_length = e1000_cable_length_80_110; else if (average <= e1000_igp_cable_length_140) phy_info->cable_length = e1000_cable_length_110_140; else phy_info->cable_length = e1000_cable_length_140; } return E1000_SUCCESS; } /** * e1000_phy_m88_get_info - get m88 specific registers * @hw: Struct containing variables accessed by shared code * @phy_info: PHY information structure * * Get PHY information from various PHY registers for m88 PHY only. */ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info) { s32 ret_val; u16 phy_data; e1000_rev_polarity polarity; /* The downshift status is checked only once, after link is established, * and it stored in the hw->speed_downgraded parameter. */ phy_info->downshift = (e1000_downshift) hw->speed_downgraded; ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_info->extended_10bt_distance = FIELD_GET(M88E1000_PSCR_10BT_EXT_DIST_ENABLE, phy_data) ? e1000_10bt_ext_dist_enable_lower : e1000_10bt_ext_dist_enable_normal; phy_info->polarity_correction = FIELD_GET(M88E1000_PSCR_POLARITY_REVERSAL, phy_data) ? e1000_polarity_reversal_disabled : e1000_polarity_reversal_enabled; /* Check polarity status */ ret_val = e1000_check_polarity(hw, &polarity); if (ret_val) return ret_val; phy_info->cable_polarity = polarity; ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) return ret_val; phy_info->mdix_mode = (e1000_auto_x_mode)FIELD_GET(M88E1000_PSSR_MDIX, phy_data); if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { /* Cable Length Estimation and Local/Remote Receiver Information * are only valid at 1000 Mbps. */ phy_info->cable_length = (e1000_cable_length)FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) return ret_val; phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS, phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS, phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; } return E1000_SUCCESS; } /** * e1000_phy_get_info - request phy info * @hw: Struct containing variables accessed by shared code * @phy_info: PHY information structure * * Get PHY information from various PHY registers */ s32 e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info) { s32 ret_val; u16 phy_data; phy_info->cable_length = e1000_cable_length_undefined; phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_undefined; phy_info->cable_polarity = e1000_rev_polarity_undefined; phy_info->downshift = e1000_downshift_undefined; phy_info->polarity_correction = e1000_polarity_reversal_undefined; phy_info->mdix_mode = e1000_auto_x_mode_undefined; phy_info->local_rx = e1000_1000t_rx_status_undefined; phy_info->remote_rx = e1000_1000t_rx_status_undefined; if (hw->media_type != e1000_media_type_copper) { e_dbg("PHY info is only valid for copper media\n"); return -E1000_ERR_CONFIG; } ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; if ((phy_data & MII_SR_LINK_STATUS) != MII_SR_LINK_STATUS) { e_dbg("PHY info is only valid if link is up\n"); return -E1000_ERR_CONFIG; } if (hw->phy_type == e1000_phy_igp) return e1000_phy_igp_get_info(hw, phy_info); else if ((hw->phy_type == e1000_phy_8211) || (hw->phy_type == e1000_phy_8201)) return E1000_SUCCESS; else return e1000_phy_m88_get_info(hw, phy_info); } s32 e1000_validate_mdi_setting(struct e1000_hw *hw) { if (!hw->autoneg && (hw->mdix == 0 || hw->mdix == 3)) { e_dbg("Invalid MDI setting detected\n"); hw->mdix = 1; return -E1000_ERR_CONFIG; } return E1000_SUCCESS; } /** * e1000_init_eeprom_params - initialize sw eeprom vars * @hw: Struct containing variables accessed by shared code * * Sets up eeprom variables in the hw struct. Must be called after mac_type * is configured. */ s32 e1000_init_eeprom_params(struct e1000_hw *hw) { struct e1000_eeprom_info *eeprom = &hw->eeprom; u32 eecd = er32(EECD); s32 ret_val = E1000_SUCCESS; u16 eeprom_size; switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: case e1000_82543: case e1000_82544: eeprom->type = e1000_eeprom_microwire; eeprom->word_size = 64; eeprom->opcode_bits = 3; eeprom->address_bits = 6; eeprom->delay_usec = 50; break; case e1000_82540: case e1000_82545: case e1000_82545_rev_3: case e1000_82546: case e1000_82546_rev_3: eeprom->type = e1000_eeprom_microwire; eeprom->opcode_bits = 3; eeprom->delay_usec = 50; if (eecd & E1000_EECD_SIZE) { eeprom->word_size = 256; eeprom->address_bits = 8; } else { eeprom->word_size = 64; eeprom->address_bits = 6; } break; case e1000_82541: case e1000_82541_rev_2: case e1000_82547: case e1000_82547_rev_2: if (eecd & E1000_EECD_TYPE) { eeprom->type = e1000_eeprom_spi; eeprom->opcode_bits = 8; eeprom->delay_usec = 1; if (eecd & E1000_EECD_ADDR_BITS) { eeprom->page_size = 32; eeprom->address_bits = 16; } else { eeprom->page_size = 8; eeprom->address_bits = 8; } } else { eeprom->type = e1000_eeprom_microwire; eeprom->opcode_bits = 3; eeprom->delay_usec = 50; if (eecd & E1000_EECD_ADDR_BITS) { eeprom->word_size = 256; eeprom->address_bits = 8; } else { eeprom->word_size = 64; eeprom->address_bits = 6; } } break; default: break; } if (eeprom->type == e1000_eeprom_spi) { /* eeprom_size will be an enum [0..8] that maps to eeprom sizes * 128B to 32KB (incremented by powers of 2). */ /* Set to default value for initial eeprom read. */ eeprom->word_size = 64; ret_val = e1000_read_eeprom(hw, EEPROM_CFG, 1, &eeprom_size); if (ret_val) return ret_val; eeprom_size = FIELD_GET(EEPROM_SIZE_MASK, eeprom_size); /* 256B eeprom size was not supported in earlier hardware, so we * bump eeprom_size up one to ensure that "1" (which maps to * 256B) is never the result used in the shifting logic below. */ if (eeprom_size) eeprom_size++; eeprom->word_size = 1 << (eeprom_size + EEPROM_WORD_SIZE_SHIFT); } return ret_val; } /** * e1000_raise_ee_clk - Raises the EEPROM's clock input. * @hw: Struct containing variables accessed by shared code * @eecd: EECD's current value */ static void e1000_raise_ee_clk(struct e1000_hw *hw, u32 *eecd) { /* Raise the clock input to the EEPROM (by setting the SK bit), and then * wait <delay> microseconds. */ *eecd = *eecd | E1000_EECD_SK; ew32(EECD, *eecd); E1000_WRITE_FLUSH(); udelay(hw->eeprom.delay_usec); } /** * e1000_lower_ee_clk - Lowers the EEPROM's clock input. * @hw: Struct containing variables accessed by shared code * @eecd: EECD's current value */ static void e1000_lower_ee_clk(struct e1000_hw *hw, u32 *eecd) { /* Lower the clock input to the EEPROM (by clearing the SK bit), and * then wait 50 microseconds. */ *eecd = *eecd & ~E1000_EECD_SK; ew32(EECD, *eecd); E1000_WRITE_FLUSH(); udelay(hw->eeprom.delay_usec); } /** * e1000_shift_out_ee_bits - Shift data bits out to the EEPROM. * @hw: Struct containing variables accessed by shared code * @data: data to send to the EEPROM * @count: number of bits to shift out */ static void e1000_shift_out_ee_bits(struct e1000_hw *hw, u16 data, u16 count) { struct e1000_eeprom_info *eeprom = &hw->eeprom; u32 eecd; u32 mask; /* We need to shift "count" bits out to the EEPROM. So, value in the * "data" parameter will be shifted out to the EEPROM one bit at a time. * In order to do this, "data" must be broken down into bits. */ mask = 0x01 << (count - 1); eecd = er32(EECD); if (eeprom->type == e1000_eeprom_microwire) eecd &= ~E1000_EECD_DO; else if (eeprom->type == e1000_eeprom_spi) eecd |= E1000_EECD_DO; do { /* A "1" is shifted out to the EEPROM by setting bit "DI" to a * "1", and then raising and then lowering the clock (the SK bit * controls the clock input to the EEPROM). A "0" is shifted * out to the EEPROM by setting "DI" to "0" and then raising and * then lowering the clock. */ eecd &= ~E1000_EECD_DI; if (data & mask) eecd |= E1000_EECD_DI; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(eeprom->delay_usec); e1000_raise_ee_clk(hw, &eecd); e1000_lower_ee_clk(hw, &eecd); mask = mask >> 1; } while (mask); /* We leave the "DI" bit set to "0" when we leave this routine. */ eecd &= ~E1000_EECD_DI; ew32(EECD, eecd); } /** * e1000_shift_in_ee_bits - Shift data bits in from the EEPROM * @hw: Struct containing variables accessed by shared code * @count: number of bits to shift in */ static u16 e1000_shift_in_ee_bits(struct e1000_hw *hw, u16 count) { u32 eecd; u32 i; u16 data; /* In order to read a register from the EEPROM, we need to shift 'count' * bits in from the EEPROM. Bits are "shifted in" by raising the clock * input to the EEPROM (setting the SK bit), and then reading the value * of the "DO" bit. During this "shifting in" process the "DI" bit * should always be clear. */ eecd = er32(EECD); eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); data = 0; for (i = 0; i < count; i++) { data = data << 1; e1000_raise_ee_clk(hw, &eecd); eecd = er32(EECD); eecd &= ~(E1000_EECD_DI); if (eecd & E1000_EECD_DO) data |= 1; e1000_lower_ee_clk(hw, &eecd); } return data; } /** * e1000_acquire_eeprom - Prepares EEPROM for access * @hw: Struct containing variables accessed by shared code * * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This * function should be called before issuing a command to the EEPROM. */ static s32 e1000_acquire_eeprom(struct e1000_hw *hw) { struct e1000_eeprom_info *eeprom = &hw->eeprom; u32 eecd, i = 0; eecd = er32(EECD); /* Request EEPROM Access */ if (hw->mac_type > e1000_82544) { eecd |= E1000_EECD_REQ; ew32(EECD, eecd); eecd = er32(EECD); while ((!(eecd & E1000_EECD_GNT)) && (i < E1000_EEPROM_GRANT_ATTEMPTS)) { i++; udelay(5); eecd = er32(EECD); } if (!(eecd & E1000_EECD_GNT)) { eecd &= ~E1000_EECD_REQ; ew32(EECD, eecd); e_dbg("Could not acquire EEPROM grant\n"); return -E1000_ERR_EEPROM; } } /* Setup EEPROM for Read/Write */ if (eeprom->type == e1000_eeprom_microwire) { /* Clear SK and DI */ eecd &= ~(E1000_EECD_DI | E1000_EECD_SK); ew32(EECD, eecd); /* Set CS */ eecd |= E1000_EECD_CS; ew32(EECD, eecd); } else if (eeprom->type == e1000_eeprom_spi) { /* Clear SK and CS */ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(1); } return E1000_SUCCESS; } /** * e1000_standby_eeprom - Returns EEPROM to a "standby" state * @hw: Struct containing variables accessed by shared code */ static void e1000_standby_eeprom(struct e1000_hw *hw) { struct e1000_eeprom_info *eeprom = &hw->eeprom; u32 eecd; eecd = er32(EECD); if (eeprom->type == e1000_eeprom_microwire) { eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(eeprom->delay_usec); /* Clock high */ eecd |= E1000_EECD_SK; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(eeprom->delay_usec); /* Select EEPROM */ eecd |= E1000_EECD_CS; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(eeprom->delay_usec); /* Clock low */ eecd &= ~E1000_EECD_SK; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(eeprom->delay_usec); } else if (eeprom->type == e1000_eeprom_spi) { /* Toggle CS to flush commands */ eecd |= E1000_EECD_CS; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(eeprom->delay_usec); eecd &= ~E1000_EECD_CS; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(eeprom->delay_usec); } } /** * e1000_release_eeprom - drop chip select * @hw: Struct containing variables accessed by shared code * * Terminates a command by inverting the EEPROM's chip select pin */ static void e1000_release_eeprom(struct e1000_hw *hw) { u32 eecd; eecd = er32(EECD); if (hw->eeprom.type == e1000_eeprom_spi) { eecd |= E1000_EECD_CS; /* Pull CS high */ eecd &= ~E1000_EECD_SK; /* Lower SCK */ ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(hw->eeprom.delay_usec); } else if (hw->eeprom.type == e1000_eeprom_microwire) { /* cleanup eeprom */ /* CS on Microwire is active-high */ eecd &= ~(E1000_EECD_CS | E1000_EECD_DI); ew32(EECD, eecd); /* Rising edge of clock */ eecd |= E1000_EECD_SK; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(hw->eeprom.delay_usec); /* Falling edge of clock */ eecd &= ~E1000_EECD_SK; ew32(EECD, eecd); E1000_WRITE_FLUSH(); udelay(hw->eeprom.delay_usec); } /* Stop requesting EEPROM access */ if (hw->mac_type > e1000_82544) { eecd &= ~E1000_EECD_REQ; ew32(EECD, eecd); } } /** * e1000_spi_eeprom_ready - Reads a 16 bit word from the EEPROM. * @hw: Struct containing variables accessed by shared code */ static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw) { u16 retry_count = 0; u8 spi_stat_reg; /* Read "Status Register" repeatedly until the LSB is cleared. The * EEPROM will signal that the command has been completed by clearing * bit 0 of the internal status register. If it's not cleared within * 5 milliseconds, then error out. */ retry_count = 0; do { e1000_shift_out_ee_bits(hw, EEPROM_RDSR_OPCODE_SPI, hw->eeprom.opcode_bits); spi_stat_reg = (u8)e1000_shift_in_ee_bits(hw, 8); if (!(spi_stat_reg & EEPROM_STATUS_RDY_SPI)) break; udelay(5); retry_count += 5; e1000_standby_eeprom(hw); } while (retry_count < EEPROM_MAX_RETRY_SPI); /* ATMEL SPI write time could vary from 0-20mSec on 3.3V devices (and * only 0-5mSec on 5V devices) */ if (retry_count >= EEPROM_MAX_RETRY_SPI) { e_dbg("SPI EEPROM Status error\n"); return -E1000_ERR_EEPROM; } return E1000_SUCCESS; } /** * e1000_read_eeprom - Reads a 16 bit word from the EEPROM. * @hw: Struct containing variables accessed by shared code * @offset: offset of word in the EEPROM to read * @data: word read from the EEPROM * @words: number of words to read */ s32 e1000_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 ret; mutex_lock(&e1000_eeprom_lock); ret = e1000_do_read_eeprom(hw, offset, words, data); mutex_unlock(&e1000_eeprom_lock); return ret; } static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_eeprom_info *eeprom = &hw->eeprom; u32 i = 0; if (hw->mac_type == e1000_ce4100) { GBE_CONFIG_FLASH_READ(GBE_CONFIG_BASE_VIRT, offset, words, data); return E1000_SUCCESS; } /* A check for invalid values: offset too large, too many words, and * not enough words. */ if ((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) || (words == 0)) { e_dbg("\"words\" parameter out of bounds. Words = %d," "size = %d\n", offset, eeprom->word_size); return -E1000_ERR_EEPROM; } /* EEPROM's that don't use EERD to read require us to bit-bang the SPI * directly. In this case, we need to acquire the EEPROM so that * FW or other port software does not interrupt. */ /* Prepare the EEPROM for bit-bang reading */ if (e1000_acquire_eeprom(hw) != E1000_SUCCESS) return -E1000_ERR_EEPROM; /* Set up the SPI or Microwire EEPROM for bit-bang reading. We have * acquired the EEPROM at this point, so any returns should release it */ if (eeprom->type == e1000_eeprom_spi) { u16 word_in; u8 read_opcode = EEPROM_READ_OPCODE_SPI; if (e1000_spi_eeprom_ready(hw)) { e1000_release_eeprom(hw); return -E1000_ERR_EEPROM; } e1000_standby_eeprom(hw); /* Some SPI eeproms use the 8th address bit embedded in the * opcode */ if ((eeprom->address_bits == 8) && (offset >= 128)) read_opcode |= EEPROM_A8_OPCODE_SPI; /* Send the READ command (opcode + addr) */ e1000_shift_out_ee_bits(hw, read_opcode, eeprom->opcode_bits); e1000_shift_out_ee_bits(hw, (u16)(offset * 2), eeprom->address_bits); /* Read the data. The address of the eeprom internally * increments with each byte (spi) being read, saving on the * overhead of eeprom setup and tear-down. The address counter * will roll over if reading beyond the size of the eeprom, thus * allowing the entire memory to be read starting from any * offset. */ for (i = 0; i < words; i++) { word_in = e1000_shift_in_ee_bits(hw, 16); data[i] = (word_in >> 8) | (word_in << 8); } } else if (eeprom->type == e1000_eeprom_microwire) { for (i = 0; i < words; i++) { /* Send the READ command (opcode + addr) */ e1000_shift_out_ee_bits(hw, EEPROM_READ_OPCODE_MICROWIRE, eeprom->opcode_bits); e1000_shift_out_ee_bits(hw, (u16)(offset + i), eeprom->address_bits); /* Read the data. For microwire, each word requires the * overhead of eeprom setup and tear-down. */ data[i] = e1000_shift_in_ee_bits(hw, 16); e1000_standby_eeprom(hw); cond_resched(); } } /* End this read operation */ e1000_release_eeprom(hw); return E1000_SUCCESS; } /** * e1000_validate_eeprom_checksum - Verifies that the EEPROM has a valid checksum * @hw: Struct containing variables accessed by shared code * * Reads the first 64 16 bit words of the EEPROM and sums the values read. * If the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is * valid. */ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw) { u16 checksum = 0; u16 i, eeprom_data; for (i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) { if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) { e_dbg("EEPROM Read Error\n"); return -E1000_ERR_EEPROM; } checksum += eeprom_data; } #ifdef CONFIG_PARISC /* This is a signature and not a checksum on HP c8000 */ if ((hw->subsystem_vendor_id == 0x103C) && (eeprom_data == 0x16d6)) return E1000_SUCCESS; #endif if (checksum == (u16)EEPROM_SUM) return E1000_SUCCESS; else { e_dbg("EEPROM Checksum Invalid\n"); return -E1000_ERR_EEPROM; } } /** * e1000_update_eeprom_checksum - Calculates/writes the EEPROM checksum * @hw: Struct containing variables accessed by shared code * * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA. * Writes the difference to word offset 63 of the EEPROM. */ s32 e1000_update_eeprom_checksum(struct e1000_hw *hw) { u16 checksum = 0; u16 i, eeprom_data; for (i = 0; i < EEPROM_CHECKSUM_REG; i++) { if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) { e_dbg("EEPROM Read Error\n"); return -E1000_ERR_EEPROM; } checksum += eeprom_data; } checksum = (u16)EEPROM_SUM - checksum; if (e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) { e_dbg("EEPROM Write Error\n"); return -E1000_ERR_EEPROM; } return E1000_SUCCESS; } /** * e1000_write_eeprom - write words to the different EEPROM types. * @hw: Struct containing variables accessed by shared code * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: 16 bit word to be written to the EEPROM * * If e1000_update_eeprom_checksum is not called after this function, the * EEPROM will most likely contain an invalid checksum. */ s32 e1000_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 ret; mutex_lock(&e1000_eeprom_lock); ret = e1000_do_write_eeprom(hw, offset, words, data); mutex_unlock(&e1000_eeprom_lock); return ret; } static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_eeprom_info *eeprom = &hw->eeprom; s32 status = 0; if (hw->mac_type == e1000_ce4100) { GBE_CONFIG_FLASH_WRITE(GBE_CONFIG_BASE_VIRT, offset, words, data); return E1000_SUCCESS; } /* A check for invalid values: offset too large, too many words, and * not enough words. */ if ((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) || (words == 0)) { e_dbg("\"words\" parameter out of bounds\n"); return -E1000_ERR_EEPROM; } /* Prepare the EEPROM for writing */ if (e1000_acquire_eeprom(hw) != E1000_SUCCESS) return -E1000_ERR_EEPROM; if (eeprom->type == e1000_eeprom_microwire) { status = e1000_write_eeprom_microwire(hw, offset, words, data); } else { status = e1000_write_eeprom_spi(hw, offset, words, data); msleep(10); } /* Done with writing */ e1000_release_eeprom(hw); return status; } /** * e1000_write_eeprom_spi - Writes a 16 bit word to a given offset in an SPI EEPROM. * @hw: Struct containing variables accessed by shared code * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: pointer to array of 8 bit words to be written to the EEPROM */ static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_eeprom_info *eeprom = &hw->eeprom; u16 widx = 0; while (widx < words) { u8 write_opcode = EEPROM_WRITE_OPCODE_SPI; if (e1000_spi_eeprom_ready(hw)) return -E1000_ERR_EEPROM; e1000_standby_eeprom(hw); cond_resched(); /* Send the WRITE ENABLE command (8 bit opcode ) */ e1000_shift_out_ee_bits(hw, EEPROM_WREN_OPCODE_SPI, eeprom->opcode_bits); e1000_standby_eeprom(hw); /* Some SPI eeproms use the 8th address bit embedded in the * opcode */ if ((eeprom->address_bits == 8) && (offset >= 128)) write_opcode |= EEPROM_A8_OPCODE_SPI; /* Send the Write command (8-bit opcode + addr) */ e1000_shift_out_ee_bits(hw, write_opcode, eeprom->opcode_bits); e1000_shift_out_ee_bits(hw, (u16)((offset + widx) * 2), eeprom->address_bits); /* Send the data */ /* Loop to allow for up to whole page write (32 bytes) of * eeprom */ while (widx < words) { u16 word_out = data[widx]; word_out = (word_out >> 8) | (word_out << 8); e1000_shift_out_ee_bits(hw, word_out, 16); widx++; /* Some larger eeprom sizes are capable of a 32-byte * PAGE WRITE operation, while the smaller eeproms are * capable of an 8-byte PAGE WRITE operation. Break the * inner loop to pass new address */ if ((((offset + widx) * 2) % eeprom->page_size) == 0) { e1000_standby_eeprom(hw); break; } } } return E1000_SUCCESS; } /** * e1000_write_eeprom_microwire - Writes a 16 bit word to a given offset in a Microwire EEPROM. * @hw: Struct containing variables accessed by shared code * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: pointer to array of 8 bit words to be written to the EEPROM */ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_eeprom_info *eeprom = &hw->eeprom; u32 eecd; u16 words_written = 0; u16 i = 0; /* Send the write enable command to the EEPROM (3-bit opcode plus * 6/8-bit dummy address beginning with 11). It's less work to include * the 11 of the dummy address as part of the opcode than it is to shift * it over the correct number of bits for the address. This puts the * EEPROM into write/erase mode. */ e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE_MICROWIRE, (u16)(eeprom->opcode_bits + 2)); e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2)); /* Prepare the EEPROM */ e1000_standby_eeprom(hw); while (words_written < words) { /* Send the Write command (3-bit opcode + addr) */ e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE_MICROWIRE, eeprom->opcode_bits); e1000_shift_out_ee_bits(hw, (u16)(offset + words_written), eeprom->address_bits); /* Send the data */ e1000_shift_out_ee_bits(hw, data[words_written], 16); /* Toggle the CS line. This in effect tells the EEPROM to * execute the previous command. */ e1000_standby_eeprom(hw); /* Read DO repeatedly until it is high (equal to '1'). The * EEPROM will signal that the command has been completed by * raising the DO signal. If DO does not go high in 10 * milliseconds, then error out. */ for (i = 0; i < 200; i++) { eecd = er32(EECD); if (eecd & E1000_EECD_DO) break; udelay(50); } if (i == 200) { e_dbg("EEPROM Write did not complete\n"); return -E1000_ERR_EEPROM; } /* Recover from write */ e1000_standby_eeprom(hw); cond_resched(); words_written++; } /* Send the write disable command to the EEPROM (3-bit opcode plus * 6/8-bit dummy address beginning with 10). It's less work to include * the 10 of the dummy address as part of the opcode than it is to shift * it over the correct number of bits for the address. This takes the * EEPROM out of write/erase mode. */ e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE_MICROWIRE, (u16)(eeprom->opcode_bits + 2)); e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2)); return E1000_SUCCESS; } /** * e1000_read_mac_addr - read the adapters MAC from eeprom * @hw: Struct containing variables accessed by shared code * * Reads the adapter's MAC address from the EEPROM and inverts the LSB for the * second function of dual function devices */ s32 e1000_read_mac_addr(struct e1000_hw *hw) { u16 offset; u16 eeprom_data, i; for (i = 0; i < NODE_ADDRESS_SIZE; i += 2) { offset = i >> 1; if (e1000_read_eeprom(hw, offset, 1, &eeprom_data) < 0) { e_dbg("EEPROM Read Error\n"); return -E1000_ERR_EEPROM; } hw->perm_mac_addr[i] = (u8)(eeprom_data & 0x00FF); hw->perm_mac_addr[i + 1] = (u8)(eeprom_data >> 8); } switch (hw->mac_type) { default: break; case e1000_82546: case e1000_82546_rev_3: if (er32(STATUS) & E1000_STATUS_FUNC_1) hw->perm_mac_addr[5] ^= 0x01; break; } for (i = 0; i < NODE_ADDRESS_SIZE; i++) hw->mac_addr[i] = hw->perm_mac_addr[i]; return E1000_SUCCESS; } /** * e1000_init_rx_addrs - Initializes receive address filters. * @hw: Struct containing variables accessed by shared code * * Places the MAC address in receive address register 0 and clears the rest * of the receive address registers. Clears the multicast table. Assumes * the receiver is in reset when the routine is called. */ static void e1000_init_rx_addrs(struct e1000_hw *hw) { u32 i; u32 rar_num; /* Setup the receive address. */ e_dbg("Programming MAC Address into RAR[0]\n"); e1000_rar_set(hw, hw->mac_addr, 0); rar_num = E1000_RAR_ENTRIES; /* Zero out the following 14 receive addresses. RAR[15] is for * manageability */ e_dbg("Clearing RAR[1-14]\n"); for (i = 1; i < rar_num; i++) { E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); E1000_WRITE_FLUSH(); E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0); E1000_WRITE_FLUSH(); } } /** * e1000_hash_mc_addr - Hashes an address to determine its location in the multicast table * @hw: Struct containing variables accessed by shared code * @mc_addr: the multicast address to hash */ u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) { u32 hash_value = 0; /* The portion of the address that is used for the hash table is * determined by the mc_filter_type setting. */ switch (hw->mc_filter_type) { /* [0] [1] [2] [3] [4] [5] * 01 AA 00 12 34 56 * LSB MSB */ case 0: /* [47:36] i.e. 0x563 for above example address */ hash_value = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4)); break; case 1: /* [46:35] i.e. 0xAC6 for above example address */ hash_value = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5)); break; case 2: /* [45:34] i.e. 0x5D8 for above example address */ hash_value = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6)); break; case 3: /* [43:32] i.e. 0x634 for above example address */ hash_value = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8)); break; } hash_value &= 0xFFF; return hash_value; } /** * e1000_rar_set - Puts an ethernet address into a receive address register. * @hw: Struct containing variables accessed by shared code * @addr: Address to put into receive address register * @index: Receive address register to write */ void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) | ((u32)addr[2] << 16) | ((u32)addr[3] << 24)); rar_high = ((u32)addr[4] | ((u32)addr[5] << 8)); /* Disable Rx and flush all Rx frames before enabling RSS to avoid Rx * unit hang. * * Description: * If there are any Rx frames queued up or otherwise present in the HW * before RSS is enabled, and then we enable RSS, the HW Rx unit will * hang. To work around this issue, we have to disable receives and * flush out all Rx frames before we enable RSS. To do so, we modify we * redirect all Rx traffic to manageability and then reset the HW. * This flushes away Rx frames, and (since the redirections to * manageability persists across resets) keeps new ones from coming in * while we work. Then, we clear the Address Valid AV bit for all MAC * addresses and undo the re-direction to manageability. * Now, frames are coming in again, but the MAC won't accept them, so * far so good. We now proceed to initialize RSS (if necessary) and * configure the Rx unit. Last, we re-enable the AV bits and continue * on our merry way. */ switch (hw->mac_type) { default: /* Indicate to hardware the Address is Valid. */ rar_high |= E1000_RAH_AV; break; } E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low); E1000_WRITE_FLUSH(); E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high); E1000_WRITE_FLUSH(); } /** * e1000_write_vfta - Writes a value to the specified offset in the VLAN filter table. * @hw: Struct containing variables accessed by shared code * @offset: Offset in VLAN filter table to write * @value: Value to write into VLAN filter table */ void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) { u32 temp; if ((hw->mac_type == e1000_82544) && ((offset & 0x1) == 1)) { temp = E1000_READ_REG_ARRAY(hw, VFTA, (offset - 1)); E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value); E1000_WRITE_FLUSH(); E1000_WRITE_REG_ARRAY(hw, VFTA, (offset - 1), temp); E1000_WRITE_FLUSH(); } else { E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value); E1000_WRITE_FLUSH(); } } /** * e1000_clear_vfta - Clears the VLAN filter table * @hw: Struct containing variables accessed by shared code */ static void e1000_clear_vfta(struct e1000_hw *hw) { u32 offset; for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0); E1000_WRITE_FLUSH(); } } static s32 e1000_id_led_init(struct e1000_hw *hw) { u32 ledctl; const u32 ledctl_mask = 0x000000FF; const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; u16 eeprom_data, i, temp; const u16 led_mask = 0x0F; if (hw->mac_type < e1000_82540) { /* Nothing to do */ return E1000_SUCCESS; } ledctl = er32(LEDCTL); hw->ledctl_default = ledctl; hw->ledctl_mode1 = hw->ledctl_default; hw->ledctl_mode2 = hw->ledctl_default; if (e1000_read_eeprom(hw, EEPROM_ID_LED_SETTINGS, 1, &eeprom_data) < 0) { e_dbg("EEPROM Read Error\n"); return -E1000_ERR_EEPROM; } if ((eeprom_data == ID_LED_RESERVED_0000) || (eeprom_data == ID_LED_RESERVED_FFFF)) { eeprom_data = ID_LED_DEFAULT; } for (i = 0; i < 4; i++) { temp = (eeprom_data >> (i << 2)) & led_mask; switch (temp) { case ID_LED_ON1_DEF2: case ID_LED_ON1_ON2: case ID_LED_ON1_OFF2: hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); hw->ledctl_mode1 |= ledctl_on << (i << 3); break; case ID_LED_OFF1_DEF2: case ID_LED_OFF1_ON2: case ID_LED_OFF1_OFF2: hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); hw->ledctl_mode1 |= ledctl_off << (i << 3); break; default: /* Do nothing */ break; } switch (temp) { case ID_LED_DEF1_ON2: case ID_LED_ON1_ON2: case ID_LED_OFF1_ON2: hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); hw->ledctl_mode2 |= ledctl_on << (i << 3); break; case ID_LED_DEF1_OFF2: case ID_LED_ON1_OFF2: case ID_LED_OFF1_OFF2: hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); hw->ledctl_mode2 |= ledctl_off << (i << 3); break; default: /* Do nothing */ break; } } return E1000_SUCCESS; } /** * e1000_setup_led * @hw: Struct containing variables accessed by shared code * * Prepares SW controlable LED for use and saves the current state of the LED. */ s32 e1000_setup_led(struct e1000_hw *hw) { u32 ledctl; s32 ret_val = E1000_SUCCESS; switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: case e1000_82543: case e1000_82544: /* No setup necessary */ break; case e1000_82541: case e1000_82547: case e1000_82541_rev_2: case e1000_82547_rev_2: /* Turn off PHY Smart Power Down (if enabled) */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &hw->phy_spd_default); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, (u16)(hw->phy_spd_default & ~IGP01E1000_GMII_SPD)); if (ret_val) return ret_val; fallthrough; default: if (hw->media_type == e1000_media_type_fiber) { ledctl = er32(LEDCTL); /* Save current LEDCTL settings */ hw->ledctl_default = ledctl; /* Turn off LED0 */ ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK | E1000_LEDCTL_LED0_MODE_MASK); ledctl |= (E1000_LEDCTL_MODE_LED_OFF << E1000_LEDCTL_LED0_MODE_SHIFT); ew32(LEDCTL, ledctl); } else if (hw->media_type == e1000_media_type_copper) ew32(LEDCTL, hw->ledctl_mode1); break; } return E1000_SUCCESS; } /** * e1000_cleanup_led - Restores the saved state of the SW controlable LED. * @hw: Struct containing variables accessed by shared code */ s32 e1000_cleanup_led(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: case e1000_82543: case e1000_82544: /* No cleanup necessary */ break; case e1000_82541: case e1000_82547: case e1000_82541_rev_2: case e1000_82547_rev_2: /* Turn on PHY Smart Power Down (if previously enabled) */ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, hw->phy_spd_default); if (ret_val) return ret_val; fallthrough; default: /* Restore LEDCTL settings */ ew32(LEDCTL, hw->ledctl_default); break; } return E1000_SUCCESS; } /** * e1000_led_on - Turns on the software controllable LED * @hw: Struct containing variables accessed by shared code */ s32 e1000_led_on(struct e1000_hw *hw) { u32 ctrl = er32(CTRL); switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: case e1000_82543: /* Set SW Defineable Pin 0 to turn on the LED */ ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; break; case e1000_82544: if (hw->media_type == e1000_media_type_fiber) { /* Set SW Defineable Pin 0 to turn on the LED */ ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } else { /* Clear SW Defineable Pin 0 to turn on the LED */ ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } break; default: if (hw->media_type == e1000_media_type_fiber) { /* Clear SW Defineable Pin 0 to turn on the LED */ ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } else if (hw->media_type == e1000_media_type_copper) { ew32(LEDCTL, hw->ledctl_mode2); return E1000_SUCCESS; } break; } ew32(CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_led_off - Turns off the software controllable LED * @hw: Struct containing variables accessed by shared code */ s32 e1000_led_off(struct e1000_hw *hw) { u32 ctrl = er32(CTRL); switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: case e1000_82543: /* Clear SW Defineable Pin 0 to turn off the LED */ ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; break; case e1000_82544: if (hw->media_type == e1000_media_type_fiber) { /* Clear SW Defineable Pin 0 to turn off the LED */ ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } else { /* Set SW Defineable Pin 0 to turn off the LED */ ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } break; default: if (hw->media_type == e1000_media_type_fiber) { /* Set SW Defineable Pin 0 to turn off the LED */ ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } else if (hw->media_type == e1000_media_type_copper) { ew32(LEDCTL, hw->ledctl_mode1); return E1000_SUCCESS; } break; } ew32(CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_clear_hw_cntrs - Clears all hardware statistics counters. * @hw: Struct containing variables accessed by shared code */ static void e1000_clear_hw_cntrs(struct e1000_hw *hw) { er32(CRCERRS); er32(SYMERRS); er32(MPC); er32(SCC); er32(ECOL); er32(MCC); er32(LATECOL); er32(COLC); er32(DC); er32(SEC); er32(RLEC); er32(XONRXC); er32(XONTXC); er32(XOFFRXC); er32(XOFFTXC); er32(FCRUC); er32(PRC64); er32(PRC127); er32(PRC255); er32(PRC511); er32(PRC1023); er32(PRC1522); er32(GPRC); er32(BPRC); er32(MPRC); er32(GPTC); er32(GORCL); er32(GORCH); er32(GOTCL); er32(GOTCH); er32(RNBC); er32(RUC); er32(RFC); er32(ROC); er32(RJC); er32(TORL); er32(TORH); er32(TOTL); er32(TOTH); er32(TPR); er32(TPT); er32(PTC64); er32(PTC127); er32(PTC255); er32(PTC511); er32(PTC1023); er32(PTC1522); er32(MPTC); er32(BPTC); if (hw->mac_type < e1000_82543) return; er32(ALGNERRC); er32(RXERRC); er32(TNCRS); er32(CEXTERR); er32(TSCTC); er32(TSCTFC); if (hw->mac_type <= e1000_82544) return; er32(MGTPRC); er32(MGTPDC); er32(MGTPTC); } /** * e1000_reset_adaptive - Resets Adaptive IFS to its default state. * @hw: Struct containing variables accessed by shared code * * Call this after e1000_init_hw. You may override the IFS defaults by setting * hw->ifs_params_forced to true. However, you must initialize hw-> * current_ifs_val, ifs_min_val, ifs_max_val, ifs_step_size, and ifs_ratio * before calling this function. */ void e1000_reset_adaptive(struct e1000_hw *hw) { if (hw->adaptive_ifs) { if (!hw->ifs_params_forced) { hw->current_ifs_val = 0; hw->ifs_min_val = IFS_MIN; hw->ifs_max_val = IFS_MAX; hw->ifs_step_size = IFS_STEP; hw->ifs_ratio = IFS_RATIO; } hw->in_ifs_mode = false; ew32(AIT, 0); } else { e_dbg("Not in Adaptive IFS mode!\n"); } } /** * e1000_update_adaptive - update adaptive IFS * @hw: Struct containing variables accessed by shared code * * Called during the callback/watchdog routine to update IFS value based on * the ratio of transmits to collisions. */ void e1000_update_adaptive(struct e1000_hw *hw) { if (hw->adaptive_ifs) { if ((hw->collision_delta * hw->ifs_ratio) > hw->tx_packet_delta) { if (hw->tx_packet_delta > MIN_NUM_XMITS) { hw->in_ifs_mode = true; if (hw->current_ifs_val < hw->ifs_max_val) { if (hw->current_ifs_val == 0) hw->current_ifs_val = hw->ifs_min_val; else hw->current_ifs_val += hw->ifs_step_size; ew32(AIT, hw->current_ifs_val); } } } else { if (hw->in_ifs_mode && (hw->tx_packet_delta <= MIN_NUM_XMITS)) { hw->current_ifs_val = 0; hw->in_ifs_mode = false; ew32(AIT, 0); } } } else { e_dbg("Not in Adaptive IFS mode!\n"); } } /** * e1000_get_bus_info * @hw: Struct containing variables accessed by shared code * * Gets the current PCI bus type, speed, and width of the hardware */ void e1000_get_bus_info(struct e1000_hw *hw) { u32 status; switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: hw->bus_type = e1000_bus_type_pci; hw->bus_speed = e1000_bus_speed_unknown; hw->bus_width = e1000_bus_width_unknown; break; default: status = er32(STATUS); hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ? e1000_bus_type_pcix : e1000_bus_type_pci; if (hw->device_id == E1000_DEV_ID_82546EB_QUAD_COPPER) { hw->bus_speed = (hw->bus_type == e1000_bus_type_pci) ? e1000_bus_speed_66 : e1000_bus_speed_120; } else if (hw->bus_type == e1000_bus_type_pci) { hw->bus_speed = (status & E1000_STATUS_PCI66) ? e1000_bus_speed_66 : e1000_bus_speed_33; } else { switch (status & E1000_STATUS_PCIX_SPEED) { case E1000_STATUS_PCIX_SPEED_66: hw->bus_speed = e1000_bus_speed_66; break; case E1000_STATUS_PCIX_SPEED_100: hw->bus_speed = e1000_bus_speed_100; break; case E1000_STATUS_PCIX_SPEED_133: hw->bus_speed = e1000_bus_speed_133; break; default: hw->bus_speed = e1000_bus_speed_reserved; break; } } hw->bus_width = (status & E1000_STATUS_BUS64) ? e1000_bus_width_64 : e1000_bus_width_32; break; } } /** * e1000_write_reg_io * @hw: Struct containing variables accessed by shared code * @offset: offset to write to * @value: value to write * * Writes a value to one of the devices registers using port I/O (as opposed to * memory mapped I/O). Only 82544 and newer devices support port I/O. */ static void e1000_write_reg_io(struct e1000_hw *hw, u32 offset, u32 value) { unsigned long io_addr = hw->io_base; unsigned long io_data = hw->io_base + 4; e1000_io_write(hw, io_addr, offset); e1000_io_write(hw, io_data, value); } /** * e1000_get_cable_length - Estimates the cable length. * @hw: Struct containing variables accessed by shared code * @min_length: The estimated minimum length * @max_length: The estimated maximum length * * returns: - E1000_ERR_XXX * E1000_SUCCESS * * This function always returns a ranged length (minimum & maximum). * So for M88 phy's, this function interprets the one value returned from the * register to the minimum and maximum range. * For IGP phy's, the function calculates the range by the AGC registers. */ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, u16 *max_length) { s32 ret_val; u16 agc_value = 0; u16 i, phy_data; u16 cable_length; *min_length = *max_length = 0; /* Use old method for Phy older than IGP */ if (hw->phy_type == e1000_phy_m88) { ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) return ret_val; cable_length = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); /* Convert the enum value to ranged values */ switch (cable_length) { case e1000_cable_length_50: *min_length = 0; *max_length = e1000_igp_cable_length_50; break; case e1000_cable_length_50_80: *min_length = e1000_igp_cable_length_50; *max_length = e1000_igp_cable_length_80; break; case e1000_cable_length_80_110: *min_length = e1000_igp_cable_length_80; *max_length = e1000_igp_cable_length_110; break; case e1000_cable_length_110_140: *min_length = e1000_igp_cable_length_110; *max_length = e1000_igp_cable_length_140; break; case e1000_cable_length_140: *min_length = e1000_igp_cable_length_140; *max_length = e1000_igp_cable_length_170; break; default: return -E1000_ERR_PHY; } } else if (hw->phy_type == e1000_phy_igp) { /* For IGP PHY */ u16 cur_agc_value; u16 min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE; static const u16 agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = { IGP01E1000_PHY_AGC_A, IGP01E1000_PHY_AGC_B, IGP01E1000_PHY_AGC_C, IGP01E1000_PHY_AGC_D }; /* Read the AGC registers for all channels */ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data); if (ret_val) return ret_val; cur_agc_value = phy_data >> IGP01E1000_AGC_LENGTH_SHIFT; /* Value bound check. */ if ((cur_agc_value >= IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) || (cur_agc_value == 0)) return -E1000_ERR_PHY; agc_value += cur_agc_value; /* Update minimal AGC value. */ if (min_agc_value > cur_agc_value) min_agc_value = cur_agc_value; } /* Remove the minimal AGC result for length < 50m */ if (agc_value < IGP01E1000_PHY_CHANNEL_NUM * e1000_igp_cable_length_50) { agc_value -= min_agc_value; /* Get the average length of the remaining 3 channels */ agc_value /= (IGP01E1000_PHY_CHANNEL_NUM - 1); } else { /* Get the average length of all the 4 channels. */ agc_value /= IGP01E1000_PHY_CHANNEL_NUM; } /* Set the range of the calculated length. */ *min_length = ((e1000_igp_cable_length_table[agc_value] - IGP01E1000_AGC_RANGE) > 0) ? (e1000_igp_cable_length_table[agc_value] - IGP01E1000_AGC_RANGE) : 0; *max_length = e1000_igp_cable_length_table[agc_value] + IGP01E1000_AGC_RANGE; } return E1000_SUCCESS; } /** * e1000_check_polarity - Check the cable polarity * @hw: Struct containing variables accessed by shared code * @polarity: output parameter : 0 - Polarity is not reversed * 1 - Polarity is reversed. * * returns: - E1000_ERR_XXX * E1000_SUCCESS * * For phy's older than IGP, this function simply reads the polarity bit in the * Phy Status register. For IGP phy's, this bit is valid only if link speed is * 10 Mbps. If the link speed is 100 Mbps there is no polarity so this bit will * return 0. If the link speed is 1000 Mbps the polarity status is in the * IGP01E1000_PHY_PCS_INIT_REG. */ static s32 e1000_check_polarity(struct e1000_hw *hw, e1000_rev_polarity *polarity) { s32 ret_val; u16 phy_data; if (hw->phy_type == e1000_phy_m88) { /* return the Polarity bit in the Status register. */ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) return ret_val; *polarity = FIELD_GET(M88E1000_PSSR_REV_POLARITY, phy_data) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal; } else if (hw->phy_type == e1000_phy_igp) { /* Read the Status register to check the speed */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &phy_data); if (ret_val) return ret_val; /* If speed is 1000 Mbps, must read the * IGP01E1000_PHY_PCS_INIT_REG to find the polarity status */ if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) == IGP01E1000_PSSR_SPEED_1000MBPS) { /* Read the GIG initialization PCS register (0x00B4) */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG, &phy_data); if (ret_val) return ret_val; /* Check the polarity bits */ *polarity = (phy_data & IGP01E1000_PHY_POLARITY_MASK) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal; } else { /* For 10 Mbps, read the polarity bit in the status * register. (for 100 Mbps this bit is always 0) */ *polarity = (phy_data & IGP01E1000_PSSR_POLARITY_REVERSED) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal; } } return E1000_SUCCESS; } /** * e1000_check_downshift - Check if Downshift occurred * @hw: Struct containing variables accessed by shared code * * returns: - E1000_ERR_XXX * E1000_SUCCESS * * For phy's older than IGP, this function reads the Downshift bit in the Phy * Specific Status register. For IGP phy's, it reads the Downgrade bit in the * Link Health register. In IGP this bit is latched high, so the driver must * read it immediately after link is established. */ static s32 e1000_check_downshift(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; if (hw->phy_type == e1000_phy_igp) { ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_LINK_HEALTH, &phy_data); if (ret_val) return ret_val; hw->speed_downgraded = (phy_data & IGP01E1000_PLHR_SS_DOWNGRADE) ? 1 : 0; } else if (hw->phy_type == e1000_phy_m88) { ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) return ret_val; hw->speed_downgraded = FIELD_GET(M88E1000_PSSR_DOWNSHIFT, phy_data); } return E1000_SUCCESS; } static const u16 dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = { IGP01E1000_PHY_AGC_PARAM_A, IGP01E1000_PHY_AGC_PARAM_B, IGP01E1000_PHY_AGC_PARAM_C, IGP01E1000_PHY_AGC_PARAM_D }; static s32 e1000_1000Mb_check_cable_length(struct e1000_hw *hw) { u16 min_length, max_length; u16 phy_data, i; s32 ret_val; ret_val = e1000_get_cable_length(hw, &min_length, &max_length); if (ret_val) return ret_val; if (hw->dsp_config_state != e1000_dsp_config_enabled) return 0; if (min_length >= e1000_igp_cable_length_50) { for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ret_val = e1000_read_phy_reg(hw, dsp_reg_array[i], &phy_data); if (ret_val) return ret_val; phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; ret_val = e1000_write_phy_reg(hw, dsp_reg_array[i], phy_data); if (ret_val) return ret_val; } hw->dsp_config_state = e1000_dsp_config_activated; } else { u16 ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20; u32 idle_errs = 0; /* clear previous idle error counts */ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) return ret_val; for (i = 0; i < ffe_idle_err_timeout; i++) { udelay(1000); ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) return ret_val; idle_errs += (phy_data & SR_1000T_IDLE_ERROR_CNT); if (idle_errs > SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT) { hw->ffe_config_state = e1000_ffe_config_active; ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_DSP_FFE, IGP01E1000_PHY_DSP_FFE_CM_CP); if (ret_val) return ret_val; break; } if (idle_errs) ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_100; } } return 0; } /** * e1000_config_dsp_after_link_change * @hw: Struct containing variables accessed by shared code * @link_up: was link up at the time this was called * * returns: - E1000_ERR_PHY if fail to read/write the PHY * E1000_SUCCESS at any other case. * * 82541_rev_2 & 82547_rev_2 have the capability to configure the DSP when a * gigabit link is achieved to improve link quality. */ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) { s32 ret_val; u16 phy_data, phy_saved_data, speed, duplex, i; if (hw->phy_type != e1000_phy_igp) return E1000_SUCCESS; if (link_up) { ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); if (ret_val) { e_dbg("Error getting link speed and duplex\n"); return ret_val; } if (speed == SPEED_1000) { ret_val = e1000_1000Mb_check_cable_length(hw); if (ret_val) return ret_val; } } else { if (hw->dsp_config_state == e1000_dsp_config_activated) { /* Save off the current value of register 0x2F5B to be * restored at the end of the routines. */ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); if (ret_val) return ret_val; /* Disable the PHY transmitter */ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003); if (ret_val) return ret_val; msleep(20); ret_val = e1000_write_phy_reg(hw, 0x0000, IGP01E1000_IEEE_FORCE_GIGA); if (ret_val) return ret_val; for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ret_val = e1000_read_phy_reg(hw, dsp_reg_array[i], &phy_data); if (ret_val) return ret_val; phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; phy_data |= IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS; ret_val = e1000_write_phy_reg(hw, dsp_reg_array[i], phy_data); if (ret_val) return ret_val; } ret_val = e1000_write_phy_reg(hw, 0x0000, IGP01E1000_IEEE_RESTART_AUTONEG); if (ret_val) return ret_val; msleep(20); /* Now enable the transmitter */ ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); if (ret_val) return ret_val; hw->dsp_config_state = e1000_dsp_config_enabled; } if (hw->ffe_config_state == e1000_ffe_config_active) { /* Save off the current value of register 0x2F5B to be * restored at the end of the routines. */ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); if (ret_val) return ret_val; /* Disable the PHY transmitter */ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003); if (ret_val) return ret_val; msleep(20); ret_val = e1000_write_phy_reg(hw, 0x0000, IGP01E1000_IEEE_FORCE_GIGA); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_DSP_FFE, IGP01E1000_PHY_DSP_FFE_DEFAULT); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, 0x0000, IGP01E1000_IEEE_RESTART_AUTONEG); if (ret_val) return ret_val; msleep(20); /* Now enable the transmitter */ ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); if (ret_val) return ret_val; hw->ffe_config_state = e1000_ffe_config_enabled; } } return E1000_SUCCESS; } /** * e1000_set_phy_mode - Set PHY to class A mode * @hw: Struct containing variables accessed by shared code * * Assumes the following operations will follow to enable the new class mode. * 1. Do a PHY soft reset * 2. Restart auto-negotiation or force link. */ static s32 e1000_set_phy_mode(struct e1000_hw *hw) { s32 ret_val; u16 eeprom_data; if ((hw->mac_type == e1000_82545_rev_3) && (hw->media_type == e1000_media_type_copper)) { ret_val = e1000_read_eeprom(hw, EEPROM_PHY_CLASS_WORD, 1, &eeprom_data); if (ret_val) return ret_val; if ((eeprom_data != EEPROM_RESERVED_WORD) && (eeprom_data & EEPROM_PHY_CLASS_A)) { ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x000B); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x8104); if (ret_val) return ret_val; hw->phy_reset_disable = false; } } return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state - set d3 link power state * @hw: Struct containing variables accessed by shared code * @active: true to enable lplu false to disable lplu. * * This function sets the lplu state according to the active flag. When * activating lplu this function also disables smart speed and vise versa. * lplu will not be activated unless the device autonegotiation advertisement * meets standards of either 10 or 10/100 or 10/100/1000 at all duplexes. * * returns: - E1000_ERR_PHY if fail to read/write the PHY * E1000_SUCCESS at any other case. */ static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) { s32 ret_val; u16 phy_data; if (hw->phy_type != e1000_phy_igp) return E1000_SUCCESS; /* During driver activity LPLU should not be used or it will attain link * from the lowest speeds starting from 10Mbps. The capability is used * for Dx transitions and states */ if (hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) { ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data); if (ret_val) return ret_val; } if (!active) { if (hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) { phy_data &= ~IGP01E1000_GMII_FLEX_SPD; ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data); if (ret_val) return ret_val; } /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (hw->smart_speed == e1000_smart_speed_on) { ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data); if (ret_val) return ret_val; phy_data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data); if (ret_val) return ret_val; } else if (hw->smart_speed == e1000_smart_speed_off) { ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data); if (ret_val) return ret_val; phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data); if (ret_val) return ret_val; } } else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) || (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL) || (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_100_ALL)) { if (hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) { phy_data |= IGP01E1000_GMII_FLEX_SPD; ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data); if (ret_val) return ret_val; } /* When LPLU is enabled we should disable SmartSpeed */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data); if (ret_val) return ret_val; phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data); if (ret_val) return ret_val; } return E1000_SUCCESS; } /** * e1000_set_vco_speed * @hw: Struct containing variables accessed by shared code * * Change VCO speed register to improve Bit Error Rate performance of SERDES. */ static s32 e1000_set_vco_speed(struct e1000_hw *hw) { s32 ret_val; u16 default_page = 0; u16 phy_data; switch (hw->mac_type) { case e1000_82545_rev_3: case e1000_82546_rev_3: break; default: return E1000_SUCCESS; } /* Set PHY register 30, page 5, bit 8 to 0 */ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, &default_page); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0005); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); if (ret_val) return ret_val; phy_data &= ~M88E1000_PHY_VCO_REG_BIT8; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); if (ret_val) return ret_val; /* Set PHY register 30, page 4, bit 11 to 1 */ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0004); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); if (ret_val) return ret_val; phy_data |= M88E1000_PHY_VCO_REG_BIT11; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, default_page); if (ret_val) return ret_val; return E1000_SUCCESS; } /** * e1000_enable_mng_pass_thru - check for bmc pass through * @hw: Struct containing variables accessed by shared code * * Verifies the hardware needs to allow ARPs to be processed by the host * returns: - true/false */ u32 e1000_enable_mng_pass_thru(struct e1000_hw *hw) { u32 manc; if (hw->asf_firmware_present) { manc = er32(MANC); if (!(manc & E1000_MANC_RCV_TCO_EN) || !(manc & E1000_MANC_EN_MAC_ADDR_FILTER)) return false; if ((manc & E1000_MANC_SMBUS_EN) && !(manc & E1000_MANC_ASF_EN)) return true; } return false; } static s32 e1000_polarity_reversal_workaround(struct e1000_hw *hw) { s32 ret_val; u16 mii_status_reg; u16 i; /* Polarity reversal workaround for forced 10F/10H links. */ /* Disable the transmitter on the PHY */ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); if (ret_val) return ret_val; /* This loop will early-out if the NO link condition has been met. */ for (i = PHY_FORCE_TIME; i > 0; i--) { /* Read the MII Status Register and wait for Link Status bit * to be clear. */ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0) break; msleep(100); } /* Recommended delay time after link has been lost */ msleep(1000); /* Now we will re-enable th transmitter on the PHY */ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); if (ret_val) return ret_val; msleep(50); ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0); if (ret_val) return ret_val; msleep(50); ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00); if (ret_val) return ret_val; msleep(50); ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); if (ret_val) return ret_val; /* This loop will early-out if the link condition has been met. */ for (i = PHY_FORCE_TIME; i > 0; i--) { /* Read the MII Status Register and wait for Link Status bit * to be set. */ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; if (mii_status_reg & MII_SR_LINK_STATUS) break; msleep(100); } return E1000_SUCCESS; } /** * e1000_get_auto_rd_done * @hw: Struct containing variables accessed by shared code * * Check for EEPROM Auto Read bit done. * returns: - E1000_ERR_RESET if fail to reset MAC * E1000_SUCCESS at any other case. */ static s32 e1000_get_auto_rd_done(struct e1000_hw *hw) { msleep(5); return E1000_SUCCESS; } /** * e1000_get_phy_cfg_done * @hw: Struct containing variables accessed by shared code * * Checks if the PHY configuration is done * returns: - E1000_ERR_RESET if fail to reset MAC * E1000_SUCCESS at any other case. */ static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw) { msleep(10); return E1000_SUCCESS; }
7 1 6 6 7 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/ip.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_conntrack.h> #include <linux/netfilter/xt_cluster.h> static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct) { return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; } static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct) { return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; } static inline u_int32_t xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info) { return jhash_1word(ip, info->hash_seed); } static inline u_int32_t xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info) { return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed); } static inline u_int32_t xt_cluster_hash(const struct nf_conn *ct, const struct xt_cluster_match_info *info) { u_int32_t hash = 0; switch(nf_ct_l3num(ct)) { case AF_INET: hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info); break; case AF_INET6: hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info); break; default: WARN_ON(1); break; } return reciprocal_scale(hash, info->total_nodes); } static inline bool xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) { bool is_multicast = false; switch(family) { case NFPROTO_IPV4: is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr); break; case NFPROTO_IPV6: is_multicast = ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr); break; default: WARN_ON(1); break; } return is_multicast; } static bool xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct sk_buff *pskb = (struct sk_buff *)skb; const struct xt_cluster_match_info *info = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned long hash; /* This match assumes that all nodes see the same packets. This can be * achieved if the switch that connects the cluster nodes support some * sort of 'port mirroring'. However, if your switch does not support * this, your cluster nodes can reply ARP request using a multicast MAC * address. Thus, your switch will flood the same packets to the * cluster nodes with the same multicast MAC address. Using a multicast * link address is a RFC 1812 (section 3.3.2) violation, but this works * fine in practise. * * Unfortunately, if you use the multicast MAC address, the link layer * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted * by TCP and others for packets coming to this node. For that reason, * this match mangles skbuff's pkt_type if it detects a packet * addressed to a unicast address but using PACKET_MULTICAST. Yes, I * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) return false; if (ct->master) hash = xt_cluster_hash(ct->master, info); else hash = xt_cluster_hash(ct, info); return !!((1 << hash) & info->node_mask) ^ !!(info->flags & XT_CLUSTER_F_INV); } static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; int ret; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n", info->total_nodes, XT_CLUSTER_NODES_MAX); return -EINVAL; } if (info->node_mask >= (1ULL << info->total_nodes)) { pr_info_ratelimited("node mask cannot exceed total number of nodes\n"); return -EDOM; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match xt_cluster_match __read_mostly = { .name = "cluster", .family = NFPROTO_UNSPEC, .match = xt_cluster_mt, .checkentry = xt_cluster_mt_checkentry, .matchsize = sizeof(struct xt_cluster_match_info), .destroy = xt_cluster_mt_destroy, .me = THIS_MODULE, }; static int __init xt_cluster_mt_init(void) { return xt_register_match(&xt_cluster_match); } static void __exit xt_cluster_mt_fini(void) { xt_unregister_match(&xt_cluster_match); } MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: hash-based cluster match"); MODULE_ALIAS("ipt_cluster"); MODULE_ALIAS("ip6t_cluster"); module_init(xt_cluster_mt_init); module_exit(xt_cluster_mt_fini);
3 2 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 #ifndef __DRM_VMA_MANAGER_H__ #define __DRM_VMA_MANAGER_H__ /* * Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <drm/drm_mm.h> #include <linux/mm.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/types.h> /* We make up offsets for buffer objects so we can recognize them at * mmap time. pgoff in mmap is an unsigned long, so we need to make sure * that the faked up offset will fit */ #if BITS_PER_LONG == 64 #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1) #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 256) #else #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1) #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16) #endif struct drm_file; struct drm_vma_offset_file { struct rb_node vm_rb; struct drm_file *vm_tag; unsigned long vm_count; }; struct drm_vma_offset_node { rwlock_t vm_lock; struct drm_mm_node vm_node; struct rb_root vm_files; void *driver_private; }; struct drm_vma_offset_manager { rwlock_t vm_lock; struct drm_mm vm_addr_space_mm; }; void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr, unsigned long page_offset, unsigned long size); void drm_vma_offset_manager_destroy(struct drm_vma_offset_manager *mgr); struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_manager *mgr, unsigned long start, unsigned long pages); int drm_vma_offset_add(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node, unsigned long pages); void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node); int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag); int drm_vma_node_allow_once(struct drm_vma_offset_node *node, struct drm_file *tag); void drm_vma_node_revoke(struct drm_vma_offset_node *node, struct drm_file *tag); bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node, struct drm_file *tag); /** * drm_vma_offset_exact_lookup_locked() - Look up node by exact address * @mgr: Manager object * @start: Start address (page-based, not byte-based) * @pages: Size of object (page-based) * * Same as drm_vma_offset_lookup_locked() but does not allow any offset into the node. * It only returns the exact object with the given start address. * * RETURNS: * Node at exact start address @start. */ static inline struct drm_vma_offset_node * drm_vma_offset_exact_lookup_locked(struct drm_vma_offset_manager *mgr, unsigned long start, unsigned long pages) { struct drm_vma_offset_node *node; node = drm_vma_offset_lookup_locked(mgr, start, pages); return (node && node->vm_node.start == start) ? node : NULL; } /** * drm_vma_offset_lock_lookup() - Lock lookup for extended private use * @mgr: Manager object * * Lock VMA manager for extended lookups. Only locked VMA function calls * are allowed while holding this lock. All other contexts are blocked from VMA * until the lock is released via drm_vma_offset_unlock_lookup(). * * Use this if you need to take a reference to the objects returned by * drm_vma_offset_lookup_locked() before releasing this lock again. * * This lock must not be used for anything else than extended lookups. You must * not call any other VMA helpers while holding this lock. * * Note: You're in atomic-context while holding this lock! */ static inline void drm_vma_offset_lock_lookup(struct drm_vma_offset_manager *mgr) { read_lock(&mgr->vm_lock); } /** * drm_vma_offset_unlock_lookup() - Unlock lookup for extended private use * @mgr: Manager object * * Release lookup-lock. See drm_vma_offset_lock_lookup() for more information. */ static inline void drm_vma_offset_unlock_lookup(struct drm_vma_offset_manager *mgr) { read_unlock(&mgr->vm_lock); } /** * drm_vma_node_reset() - Initialize or reset node object * @node: Node to initialize or reset * * Reset a node to its initial state. This must be called before using it with * any VMA offset manager. * * This must not be called on an already allocated node, or you will leak * memory. */ static inline void drm_vma_node_reset(struct drm_vma_offset_node *node) { memset(node, 0, sizeof(*node)); node->vm_files = RB_ROOT; rwlock_init(&node->vm_lock); } /** * drm_vma_node_start() - Return start address for page-based addressing * @node: Node to inspect * * Return the start address of the given node. This can be used as offset into * the linear VM space that is provided by the VMA offset manager. Note that * this can only be used for page-based addressing. If you need a proper offset * for user-space mappings, you must apply "<< PAGE_SHIFT" or use the * drm_vma_node_offset_addr() helper instead. * * RETURNS: * Start address of @node for page-based addressing. 0 if the node does not * have an offset allocated. */ static inline unsigned long drm_vma_node_start(const struct drm_vma_offset_node *node) { return node->vm_node.start; } /** * drm_vma_node_size() - Return size (page-based) * @node: Node to inspect * * Return the size as number of pages for the given node. This is the same size * that was passed to drm_vma_offset_add(). If no offset is allocated for the * node, this is 0. * * RETURNS: * Size of @node as number of pages. 0 if the node does not have an offset * allocated. */ static inline unsigned long drm_vma_node_size(struct drm_vma_offset_node *node) { return node->vm_node.size; } /** * drm_vma_node_offset_addr() - Return sanitized offset for user-space mmaps * @node: Linked offset node * * Same as drm_vma_node_start() but returns the address as a valid offset that * can be used for user-space mappings during mmap(). * This must not be called on unlinked nodes. * * RETURNS: * Offset of @node for byte-based addressing. 0 if the node does not have an * object allocated. */ static inline __u64 drm_vma_node_offset_addr(struct drm_vma_offset_node *node) { return ((__u64)node->vm_node.start) << PAGE_SHIFT; } /** * drm_vma_node_unmap() - Unmap offset node * @node: Offset node * @file_mapping: Address space to unmap @node from * * Unmap all userspace mappings for a given offset node. The mappings must be * associated with the @file_mapping address-space. If no offset exists * nothing is done. * * This call is unlocked. The caller must guarantee that drm_vma_offset_remove() * is not called on this node concurrently. */ static inline void drm_vma_node_unmap(struct drm_vma_offset_node *node, struct address_space *file_mapping) { if (drm_mm_node_allocated(&node->vm_node)) unmap_mapping_range(file_mapping, drm_vma_node_offset_addr(node), drm_vma_node_size(node) << PAGE_SHIFT, 1); } /** * drm_vma_node_verify_access() - Access verification helper for TTM * @node: Offset node * @tag: Tag of file to check * * This checks whether @tag is granted access to @node. It is the same as * drm_vma_node_is_allowed() but suitable as drop-in helper for TTM * verify_access() callbacks. * * RETURNS: * 0 if access is granted, -EACCES otherwise. */ static inline int drm_vma_node_verify_access(struct drm_vma_offset_node *node, struct drm_file *tag) { return drm_vma_node_is_allowed(node, tag) ? 0 : -EACCES; } #endif /* __DRM_VMA_MANAGER_H__ */
2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 // SPDX-License-Identifier: GPL-2.0-only /* * handle transition of Linux booting another kernel * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> */ #define pr_fmt(fmt) "kexec: " fmt #include <linux/mm.h> #include <linux/kexec.h> #include <linux/string.h> #include <linux/gfp.h> #include <linux/reboot.h> #include <linux/numa.h> #include <linux/ftrace.h> #include <linux/io.h> #include <linux/suspend.h> #include <linux/vmalloc.h> #include <linux/efi.h> #include <linux/cc_platform.h> #include <asm/init.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/io_apic.h> #include <asm/debugreg.h> #include <asm/kexec-bzimage64.h> #include <asm/setup.h> #include <asm/set_memory.h> #include <asm/cpu.h> #ifdef CONFIG_ACPI /* * Used while adding mapping for ACPI tables. * Can be reused when other iomem regions need be mapped */ struct init_pgtable_data { struct x86_mapping_info *info; pgd_t *level4p; }; static int mem_region_callback(struct resource *res, void *arg) { struct init_pgtable_data *data = arg; return kernel_ident_mapping_init(data->info, data->level4p, res->start, res->end + 1); } static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { struct init_pgtable_data data; unsigned long flags; int ret; data.info = info; data.level4p = level4p; flags = IORESOURCE_MEM | IORESOURCE_BUSY; ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &data, mem_region_callback); if (ret && ret != -EINVAL) return ret; /* ACPI tables could be located in ACPI Non-volatile Storage region */ ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &data, mem_region_callback); if (ret && ret != -EINVAL) return ret; return 0; } #else static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; } #endif #ifdef CONFIG_KEXEC_FILE const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_bzImage64_ops, NULL }; #endif static int map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p) { #ifdef CONFIG_EFI unsigned long mstart, mend; if (!efi_enabled(EFI_BOOT)) return 0; mstart = (boot_params.efi_info.efi_systab | ((u64)boot_params.efi_info.efi_systab_hi<<32)); if (efi_enabled(EFI_64BIT)) mend = mstart + sizeof(efi_system_table_64_t); else mend = mstart + sizeof(efi_system_table_32_t); if (!mstart) return 0; return kernel_ident_mapping_init(info, level4p, mstart, mend); #endif return 0; } static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); image->arch.p4d = NULL; free_page((unsigned long)image->arch.pud); image->arch.pud = NULL; free_page((unsigned long)image->arch.pmd); image->arch.pmd = NULL; free_page((unsigned long)image->arch.pte); image->arch.pte = NULL; } static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) { pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; unsigned long vaddr, paddr; int result = -ENOMEM; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; vaddr = (unsigned long)relocate_kernel; paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); pgd += pgd_index(vaddr); if (!pgd_present(*pgd)) { p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); if (!p4d) goto err; image->arch.p4d = p4d; set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); } p4d = p4d_offset(pgd, vaddr); if (!p4d_present(*p4d)) { pud = (pud_t *)get_zeroed_page(GFP_KERNEL); if (!pud) goto err; image->arch.pud = pud; set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); } pud = pud_offset(p4d, vaddr); if (!pud_present(*pud)) { pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); if (!pmd) goto err; image->arch.pmd = pmd; set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); } pmd = pmd_offset(pud, vaddr); if (!pmd_present(*pmd)) { pte = (pte_t *)get_zeroed_page(GFP_KERNEL); if (!pte) goto err; image->arch.pte = pte; set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); } pte = pte_offset_kernel(pmd, vaddr); if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) prot = PAGE_KERNEL_EXEC; set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); return 0; err: return result; } static void *alloc_pgt_page(void *data) { struct kimage *image = (struct kimage *)data; struct page *page; void *p = NULL; page = kimage_alloc_control_pages(image, 0); if (page) { p = page_address(page); clear_page(p); } return p; } static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { struct x86_mapping_info info = { .alloc_pgt_page = alloc_pgt_page, .context = image, .page_flag = __PAGE_KERNEL_LARGE_EXEC, .kernpg_flag = _KERNPG_TABLE_NOENC, }; unsigned long mstart, mend; pgd_t *level4p; int result; int i; level4p = (pgd_t *)__va(start_pgtable); clear_page(level4p); if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { info.page_flag |= _PAGE_ENC; info.kernpg_flag |= _PAGE_ENC; } if (direct_gbpages) info.direct_gbpages = true; for (i = 0; i < nr_pfn_mapped; i++) { mstart = pfn_mapped[i].start << PAGE_SHIFT; mend = pfn_mapped[i].end << PAGE_SHIFT; result = kernel_ident_mapping_init(&info, level4p, mstart, mend); if (result) return result; } /* * segments's mem ranges could be outside 0 ~ max_pfn, * for example when jump back to original kernel from kexeced kernel. * or first kernel is booted with user mem map, and second kernel * could be loaded out of that range. */ for (i = 0; i < image->nr_segments; i++) { mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; result = kernel_ident_mapping_init(&info, level4p, mstart, mend); if (result) return result; } /* * Prepare EFI systab and ACPI tables for kexec kernel since they are * not covered by pfn_mapped. */ result = map_efi_systab(&info, level4p); if (result) return result; result = map_acpi_tables(&info, level4p); if (result) return result; return init_transition_pgtable(image, level4p); } static void load_segments(void) { __asm__ __volatile__ ( "\tmovl %0,%%ds\n" "\tmovl %0,%%es\n" "\tmovl %0,%%ss\n" "\tmovl %0,%%fs\n" "\tmovl %0,%%gs\n" : : "a" (__KERNEL_DS) : "memory" ); } int machine_kexec_prepare(struct kimage *image) { unsigned long start_pgtable; int result; /* Calculate the offsets */ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, start_pgtable); if (result) return result; return 0; } void machine_kexec_cleanup(struct kimage *image) { free_transition_pgtable(image); } /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; int save_ftrace_enabled; #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) save_processor_state(); #endif save_ftrace_enabled = __ftrace_enabled_save(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); hw_breakpoint_disable(); cet_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC /* * We need to put APICs in legacy mode so that we can * get timer interrupts in second kernel. kexec/kdump * paths already have calls to restore_boot_irq_mode() * in one form or other. kexec jump path also need one. */ clear_IO_APIC(); restore_boot_irq_mode(); #endif } control_page = page_address(image->control_code_page) + PAGE_SIZE; __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; page_list[PA_TABLE_PAGE] = (unsigned long)__pa(page_address(image->control_code_page)); if (image->type == KEXEC_TYPE_DEFAULT) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); /* * The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is * set to a specific selector, the invisible part is loaded * with from a table in memory. At no other time is the * descriptor table in memory accessed. * * I take advantage of this here by force loading the * segments, before I zap the gdt with an invalid value. */ load_segments(); /* * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ native_idt_invalidate(); native_gdt_invalidate(); /* now call it */ image->start = relocate_kernel((unsigned long)image->head, (unsigned long)page_list, image->start, image->preserve_context, cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)); #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) restore_processor_state(); #endif __ftrace_enabled_restore(save_ftrace_enabled); } /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE /* * Apply purgatory relocations. * * @pi: Purgatory to be relocated. * @section: Section relocations applying to. * @relsec: Section containing RELAs. * @symtabsec: Corresponding symtab. * * TODO: Some of the code belongs to generic code. Move that in kexec.c. */ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, const Elf_Shdr *relsec, const Elf_Shdr *symtabsec) { unsigned int i; Elf64_Rela *rel; Elf64_Sym *sym; void *location; unsigned long address, sec_base, value; const char *strtab, *name, *shstrtab; const Elf_Shdr *sechdrs; /* String & section header string table */ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; strtab = (char *)pi->ehdr + sechdrs[symtabsec->sh_link].sh_offset; shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; rel = (void *)pi->ehdr + relsec->sh_offset; pr_debug("Applying relocate section %s to %u\n", shstrtab + relsec->sh_name, relsec->sh_info); for (i = 0; i < relsec->sh_size / sizeof(*rel); i++) { /* * rel[i].r_offset contains byte offset from beginning * of section to the storage unit affected. * * This is location to update. This is temporary buffer * where section is currently loaded. This will finally be * loaded to a different address later, pointed to by * ->sh_addr. kexec takes care of moving it * (kexec_load_segment()). */ location = pi->purgatory_buf; location += section->sh_offset; location += rel[i].r_offset; /* Final address of the location */ address = section->sh_addr + rel[i].r_offset; /* * rel[i].r_info contains information about symbol table index * w.r.t which relocation must be made and type of relocation * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get * these respectively. */ sym = (void *)pi->ehdr + symtabsec->sh_offset; sym += ELF64_R_SYM(rel[i].r_info); if (sym->st_name) name = strtab + sym->st_name; else name = shstrtab + sechdrs[sym->st_shndx].sh_name; pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n", name, sym->st_info, sym->st_shndx, sym->st_value, sym->st_size); if (sym->st_shndx == SHN_UNDEF) { pr_err("Undefined symbol: %s\n", name); return -ENOEXEC; } if (sym->st_shndx == SHN_COMMON) { pr_err("symbol '%s' in common section\n", name); return -ENOEXEC; } if (sym->st_shndx == SHN_ABS) sec_base = 0; else if (sym->st_shndx >= pi->ehdr->e_shnum) { pr_err("Invalid section %d for symbol %s\n", sym->st_shndx, name); return -ENOEXEC; } else sec_base = pi->sechdrs[sym->st_shndx].sh_addr; value = sym->st_value; value += sec_base; value += rel[i].r_addend; switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: break; case R_X86_64_64: *(u64 *)location = value; break; case R_X86_64_32: *(u32 *)location = value; if (value != *(u32 *)location) goto overflow; break; case R_X86_64_32S: *(s32 *)location = value; if ((s64)value != *(s32 *)location) goto overflow; break; case R_X86_64_PC32: case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; default: pr_err("Unknown rela relocation: %llu\n", ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } } return 0; overflow: pr_err("Overflow in relocation type %d value 0x%lx\n", (int)ELF64_R_TYPE(rel[i].r_info), value); return -ENOEXEC; } int arch_kimage_file_post_load_cleanup(struct kimage *image) { vfree(image->elf_headers); image->elf_headers = NULL; image->elf_headers_sz = 0; return kexec_image_post_load_cleanup_default(image); } #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP static int kexec_mark_range(unsigned long start, unsigned long end, bool protect) { struct page *page; unsigned int nr_pages; /* * For physical range: [start, end]. We must skip the unassigned * crashk resource with zero-valued "end" member. */ if (!end || start > end) return 0; page = pfn_to_page(start >> PAGE_SHIFT); nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; if (protect) return set_pages_ro(page, nr_pages); else return set_pages_rw(page, nr_pages); } static void kexec_mark_crashkres(bool protect) { unsigned long control; kexec_mark_range(crashk_low_res.start, crashk_low_res.end, protect); /* Don't touch the control code page used in crash_kexec().*/ control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page)); /* Control code page is located in the 2nd page. */ kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect); control += KEXEC_CONTROL_PAGE_SIZE; kexec_mark_range(control, crashk_res.end, protect); } void arch_kexec_protect_crashkres(void) { kexec_mark_crashkres(true); } void arch_kexec_unprotect_crashkres(void) { kexec_mark_crashkres(false); } #endif /* * During a traditional boot under SME, SME will encrypt the kernel, * so the SME kexec kernel also needs to be un-encrypted in order to * replicate a normal SME boot. * * During a traditional boot under SEV, the kernel has already been * loaded encrypted, so the SEV kexec kernel needs to be encrypted in * order to replicate a normal SEV boot. */ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) return 0; /* * If host memory encryption is active we need to be sure that kexec * pages are not encrypted because when we boot to the new kernel the * pages won't be accessed encrypted (initially). */ return set_memory_decrypted((unsigned long)vaddr, pages); } void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) return; /* * If host memory encryption is active we need to reset the pages back * to being an encrypted mapping before freeing them. */ set_memory_encrypted((unsigned long)vaddr, pages); }
3 3 3 3 3 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC Tx data buffering. * * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include "ar-internal.h" static atomic_t rxrpc_txbuf_debug_ids; atomic_t rxrpc_nr_txbuf; /* * Allocate and partially initialise a data transmission buffer. */ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp) { struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; size_t total, hoff; void *buf; txb = kmalloc(sizeof(*txb), gfp); if (!txb) return NULL; hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); total = hoff + sizeof(*whdr) + data_size; data_align = umax(data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, data_align); mutex_unlock(&call->conn->tx_data_alloc_lock); if (!buf) { kfree(txb); return NULL; } whdr = buf + hoff; INIT_LIST_HEAD(&txb->call_link); INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); txb->last_sent = KTIME_MIN; txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->space = data_size; txb->len = 0; txb->offset = sizeof(*whdr); txb->flags = call->conn->out_clientflag; txb->ack_why = 0; txb->seq = call->tx_prepared + 1; txb->serial = 0; txb->cksum = 0; txb->nr_kvec = 1; txb->kvec[0].iov_base = whdr; txb->kvec[0].iov_len = sizeof(*whdr); whdr->epoch = htonl(call->conn->proto.epoch); whdr->cid = htonl(call->cid); whdr->callNumber = htonl(call->call_id); whdr->seq = htonl(txb->seq); whdr->type = RXRPC_PACKET_TYPE_DATA; whdr->flags = 0; whdr->userStatus = 0; whdr->securityIndex = call->security_ix; whdr->_rsvd = 0; whdr->serviceId = htons(call->dest_srx.srx_service); trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, rxrpc_txbuf_alloc_data); atomic_inc(&rxrpc_nr_txbuf); return txb; } /* * Allocate and partially initialise an ACK packet. */ struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size) { struct rxrpc_wire_header *whdr; struct rxrpc_acktrailer *trailer; struct rxrpc_ackpacket *ack; struct rxrpc_txbuf *txb; gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; void *buf, *buf2 = NULL; u8 *filler; txb = kmalloc(sizeof(*txb), gfp); if (!txb) return NULL; buf = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); if (!buf) { kfree(txb); return NULL; } if (sack_size) { buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); if (!buf2) { page_frag_free(buf); kfree(txb); return NULL; } } whdr = buf; ack = buf + sizeof(*whdr); filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; INIT_LIST_HEAD(&txb->call_link); INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->space = 0; txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer); txb->offset = 0; txb->flags = call->conn->out_clientflag; txb->ack_rwind = 0; txb->seq = 0; txb->serial = 0; txb->cksum = 0; txb->nr_kvec = 3; txb->kvec[0].iov_base = whdr; txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack); txb->kvec[1].iov_base = buf2; txb->kvec[1].iov_len = sack_size; txb->kvec[2].iov_base = filler; txb->kvec[2].iov_len = 3 + sizeof(*trailer); whdr->epoch = htonl(call->conn->proto.epoch); whdr->cid = htonl(call->cid); whdr->callNumber = htonl(call->call_id); whdr->seq = 0; whdr->type = RXRPC_PACKET_TYPE_ACK; whdr->flags = 0; whdr->userStatus = 0; whdr->securityIndex = call->security_ix; whdr->_rsvd = 0; whdr->serviceId = htons(call->dest_srx.srx_service); get_page(virt_to_head_page(trailer)); trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, rxrpc_txbuf_alloc_ack); atomic_inc(&rxrpc_nr_txbuf); return txb; } void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) { int r; __refcount_inc(&txb->ref, &r); trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r + 1, what); } void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) { int r = refcount_read(&txb->ref); trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what); } static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) { int i; trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); for (i = 0; i < txb->nr_kvec; i++) if (txb->kvec[i].iov_base) page_frag_free(txb->kvec[i].iov_base); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); } void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) { unsigned int debug_id, call_debug_id; rxrpc_seq_t seq; bool dead; int r; if (txb) { debug_id = txb->debug_id; call_debug_id = txb->call_debug_id; seq = txb->seq; dead = __refcount_dec_and_test(&txb->ref, &r); trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what); if (dead) rxrpc_free_txbuf(txb); } } /* * Shrink the transmit buffer. */ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) { struct rxrpc_txbuf *txb; rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack); bool wake = false; _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top); while ((txb = list_first_entry_or_null(&call->tx_buffer, struct rxrpc_txbuf, call_link))) { hard_ack = smp_load_acquire(&call->acks_hard_ack); if (before(hard_ack, txb->seq)) break; if (txb->seq != call->tx_bottom + 1) rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step); ASSERTCMP(txb->seq, ==, call->tx_bottom + 1); smp_store_release(&call->tx_bottom, call->tx_bottom + 1); list_del_rcu(&txb->call_link); trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated); if (after(call->acks_hard_ack, call->tx_bottom + 128)) wake = true; } if (wake) wake_up(&call->waitq); }
136 188 174 174 173 172 174 138 138 173 174 114 45 34 31 45 21 32 31 54 54 54 22 22 22 22 16 22 66 66 54 66 14 14 2 2 2 1 66 65 54 65 64 64 40 40 64 65 2 1 2 2 64 66 36 36 66 66 26 179 29 11 158 158 16 16 3 3 158 158 158 162 162 162 162 162 16 16 161 162 162 162 153 153 153 134 15 13 13 15 15 134 152 135 135 152 153 80 80 80 80 80 68 79 80 80 80 68 79 80 10 68 68 80 80 80 80 80 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/unicode.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Handler routines for unicode strings */ #include <linux/types.h> #include <linux/nls.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" /* Fold the case of a unicode char, given the 16 bit value */ /* Returns folded char, or 0 if ignorable */ static inline u16 case_fold(u16 c) { u16 tmp; tmp = hfsplus_case_fold_table[c >> 8]; if (tmp) tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; else tmp = c; return tmp; } /* Compare unicode strings, return values like normal strcmp */ int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, const struct hfsplus_unistr *s2) { u16 len1, len2, c1, c2; const hfsplus_unichr *p1, *p2; len1 = be16_to_cpu(s1->length); len2 = be16_to_cpu(s2->length); p1 = s1->unicode; p2 = s2->unicode; while (1) { c1 = c2 = 0; while (len1 && !c1) { c1 = case_fold(be16_to_cpu(*p1)); p1++; len1--; } while (len2 && !c2) { c2 = case_fold(be16_to_cpu(*p2)); p2++; len2--; } if (c1 != c2) return (c1 < c2) ? -1 : 1; if (!c1 && !c2) return 0; } } /* Compare names as a sequence of 16-bit unsigned integers */ int hfsplus_strcmp(const struct hfsplus_unistr *s1, const struct hfsplus_unistr *s2) { u16 len1, len2, c1, c2; const hfsplus_unichr *p1, *p2; int len; len1 = be16_to_cpu(s1->length); len2 = be16_to_cpu(s2->length); p1 = s1->unicode; p2 = s2->unicode; for (len = min(len1, len2); len > 0; len--) { c1 = be16_to_cpu(*p1); c2 = be16_to_cpu(*p2); if (c1 != c2) return c1 < c2 ? -1 : 1; p1++; p2++; } return len1 < len2 ? -1 : len1 > len2 ? 1 : 0; } #define Hangul_SBase 0xac00 #define Hangul_LBase 0x1100 #define Hangul_VBase 0x1161 #define Hangul_TBase 0x11a7 #define Hangul_SCount 11172 #define Hangul_LCount 19 #define Hangul_VCount 21 #define Hangul_TCount 28 #define Hangul_NCount (Hangul_VCount * Hangul_TCount) static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) { int i, s, e; s = 1; e = p[1]; if (!e || cc < p[s * 2] || cc > p[e * 2]) return NULL; do { i = (s + e) / 2; if (cc > p[i * 2]) s = i + 1; else if (cc < p[i * 2]) e = i - 1; else return hfsplus_compose_table + p[i * 2 + 1]; } while (s <= e); return NULL; } int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) { const hfsplus_unichr *ip; struct nls_table *nls = HFSPLUS_SB(sb)->nls; u8 *op; u16 cc, c0, c1; u16 *ce1, *ce2; int i, len, ustrlen, res, compose; op = astr; ip = ustr->unicode; ustrlen = be16_to_cpu(ustr->length); len = *len_p; ce1 = NULL; compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); while (ustrlen > 0) { c0 = be16_to_cpu(*ip++); ustrlen--; /* search for single decomposed char */ if (likely(compose)) ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); if (ce1) cc = ce1[0]; else cc = 0; if (cc) { /* start of a possibly decomposed Hangul char */ if (cc != 0xffff) goto done; if (!ustrlen) goto same; c1 = be16_to_cpu(*ip) - Hangul_VBase; if (c1 < Hangul_VCount) { /* compose the Hangul char */ cc = (c0 - Hangul_LBase) * Hangul_VCount; cc = (cc + c1) * Hangul_TCount; cc += Hangul_SBase; ip++; ustrlen--; if (!ustrlen) goto done; c1 = be16_to_cpu(*ip) - Hangul_TBase; if (c1 > 0 && c1 < Hangul_TCount) { cc += c1; ip++; ustrlen--; } goto done; } } while (1) { /* main loop for common case of not composed chars */ if (!ustrlen) goto same; c1 = be16_to_cpu(*ip); if (likely(compose)) ce1 = hfsplus_compose_lookup( hfsplus_compose_table, c1); if (ce1) break; switch (c0) { case 0: c0 = 0x2400; break; case '/': c0 = ':'; break; } res = nls->uni2char(c0, op, len); if (res < 0) { if (res == -ENAMETOOLONG) goto out; *op = '?'; res = 1; } op += res; len -= res; c0 = c1; ip++; ustrlen--; } ce2 = hfsplus_compose_lookup(ce1, c0); if (ce2) { i = 1; while (i < ustrlen) { ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i])); if (!ce1) break; i++; ce2 = ce1; } cc = ce2[0]; if (cc) { ip += i; ustrlen -= i; goto done; } } same: switch (c0) { case 0: cc = 0x2400; break; case '/': cc = ':'; break; default: cc = c0; } done: res = nls->uni2char(cc, op, len); if (res < 0) { if (res == -ENAMETOOLONG) goto out; *op = '?'; res = 1; } op += res; len -= res; } res = 0; out: *len_p = (char *)op - astr; return res; } /* * Convert one or more ASCII characters into a single unicode character. * Returns the number of ASCII characters corresponding to the unicode char. */ static inline int asc2unichar(struct super_block *sb, const char *astr, int len, wchar_t *uc) { int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); if (size <= 0) { *uc = '?'; size = 1; } switch (*uc) { case 0x2400: *uc = 0; break; case ':': *uc = '/'; break; } return size; } /* Decomposes a non-Hangul unicode character. */ static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size) { int off; off = hfsplus_decompose_table[(uc >> 12) & 0xf]; if (off == 0 || off == 0xffff) return NULL; off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; if (!off) return NULL; off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; if (!off) return NULL; off = hfsplus_decompose_table[off + (uc & 0xf)]; *size = off & 3; if (*size == 0) return NULL; return hfsplus_decompose_table + (off / 4); } /* * Try to decompose a unicode character as Hangul. Return 0 if @uc is not * precomposed Hangul, otherwise return the length of the decomposition. * * This function was adapted from sample code from the Unicode Standard * Annex #15: Unicode Normalization Forms, version 3.2.0. * * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed * under the Terms of Use in http://www.unicode.org/copyright.html. */ static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result) { int index; int l, v, t; index = uc - Hangul_SBase; if (index < 0 || index >= Hangul_SCount) return 0; l = Hangul_LBase + index / Hangul_NCount; v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount; t = Hangul_TBase + index % Hangul_TCount; result[0] = l; result[1] = v; if (t != Hangul_TBase) { result[2] = t; return 3; } return 2; } /* Decomposes a single unicode character. */ static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer) { u16 *result; /* Hangul is handled separately */ result = hangul_buffer; *size = hfsplus_try_decompose_hangul(uc, result); if (*size == 0) result = hfsplus_decompose_nonhangul(uc, size); return result; } int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, int max_unistr_len, const char *astr, int len) { int size, dsize, decompose; u16 *dstr, outlen = 0; wchar_t c; u16 dhangul[3]; decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); while (outlen < max_unistr_len && len > 0) { size = asc2unichar(sb, astr, len, &c); if (decompose) dstr = decompose_unichar(c, &dsize, dhangul); else dstr = NULL; if (dstr) { if (outlen + dsize > max_unistr_len) break; do { ustr->unicode[outlen++] = cpu_to_be16(*dstr++); } while (--dsize > 0); } else ustr->unicode[outlen++] = cpu_to_be16(c); astr += size; len -= size; } ustr->length = cpu_to_be16(outlen); if (len > 0) return -ENAMETOOLONG; return 0; } /* * Hash a string to an integer as appropriate for the HFS+ filesystem. * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) { struct super_block *sb = dentry->d_sb; const char *astr; const u16 *dstr; int casefold, decompose, size, len; unsigned long hash; wchar_t c; u16 c2; u16 dhangul[3]; casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); hash = init_name_hash(dentry); astr = str->name; len = str->len; while (len > 0) { int dsize; size = asc2unichar(sb, astr, len, &c); astr += size; len -= size; if (decompose) dstr = decompose_unichar(c, &dsize, dhangul); else dstr = NULL; if (dstr) { do { c2 = *dstr++; if (casefold) c2 = case_fold(c2); if (!casefold || c2) hash = partial_name_hash(c2, hash); } while (--dsize > 0); } else { c2 = c; if (casefold) c2 = case_fold(c2); if (!casefold || c2) hash = partial_name_hash(c2, hash); } } str->hash = end_name_hash(hash); return 0; } /* * Compare strings with HFS+ filename ordering. * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ int hfsplus_compare_dentry(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct super_block *sb = dentry->d_sb; int casefold, decompose, size; int dsize1, dsize2, len1, len2; const u16 *dstr1, *dstr2; const char *astr1, *astr2; u16 c1, c2; wchar_t c; u16 dhangul_1[3], dhangul_2[3]; casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); astr1 = str; len1 = len; astr2 = name->name; len2 = name->len; dsize1 = dsize2 = 0; dstr1 = dstr2 = NULL; while (len1 > 0 && len2 > 0) { if (!dsize1) { size = asc2unichar(sb, astr1, len1, &c); astr1 += size; len1 -= size; if (decompose) dstr1 = decompose_unichar(c, &dsize1, dhangul_1); if (!decompose || !dstr1) { c1 = c; dstr1 = &c1; dsize1 = 1; } } if (!dsize2) { size = asc2unichar(sb, astr2, len2, &c); astr2 += size; len2 -= size; if (decompose) dstr2 = decompose_unichar(c, &dsize2, dhangul_2); if (!decompose || !dstr2) { c2 = c; dstr2 = &c2; dsize2 = 1; } } c1 = *dstr1; c2 = *dstr2; if (casefold) { c1 = case_fold(c1); if (!c1) { dstr1++; dsize1--; continue; } c2 = case_fold(c2); if (!c2) { dstr2++; dsize2--; continue; } } if (c1 < c2) return -1; else if (c1 > c2) return 1; dstr1++; dsize1--; dstr2++; dsize2--; } if (len1 < len2) return -1; if (len1 > len2) return 1; return 0; }
126 109 109 126 125 126 108 109 124 126 126 126 11 11 3 11 11 3 3 3 11 11 11 11 11 115 114 110 14 14 14 14 113 110 111 114 114 111 115 14 14 14 115 114 115 115 11 115 112 115 109 4 4 126 126 126 126 126 125 126 26 26 124 116 125 105 125 126 126 186 186 186 126 126 126 125 126 115 114 126 126 126 37 175 175 11 11 11 11 11 182 22 17 14 14 17 21 11 11 11 126 49 13 13 13 125 126 126 13 126 126 126 125 118 119 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_log.h" #include "xfs_rmap.h" #include "xfs_refcount.h" #include "xfs_bmap.h" #include "xfs_alloc.h" #include "xfs_buf.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_trans_priv.h" static struct kmem_cache *xfs_defer_pending_cache; /* * Deferred Operations in XFS * * Due to the way locking rules work in XFS, certain transactions (block * mapping and unmapping, typically) have permanent reservations so that * we can roll the transaction to adhere to AG locking order rules and * to unlock buffers between metadata updates. Prior to rmap/reflink, * the mapping code had a mechanism to perform these deferrals for * extents that were going to be freed; this code makes that facility * more generic. * * When adding the reverse mapping and reflink features, it became * necessary to perform complex remapping multi-transactions to comply * with AG locking order rules, and to be able to spread a single * refcount update operation (an operation on an n-block extent can * update as many as n records!) among multiple transactions. XFS can * roll a transaction to facilitate this, but using this facility * requires us to log "intent" items in case log recovery needs to * redo the operation, and to log "done" items to indicate that redo * is not necessary. * * Deferred work is tracked in xfs_defer_pending items. Each pending * item tracks one type of deferred work. Incoming work items (which * have not yet had an intent logged) are attached to a pending item * on the dop_intake list, where they wait for the caller to finish * the deferred operations. * * Finishing a set of deferred operations is an involved process. To * start, we define "rolling a deferred-op transaction" as follows: * * > For each xfs_defer_pending item on the dop_intake list, * - Sort the work items in AG order. XFS locking * order rules require us to lock buffers in AG order. * - Create a log intent item for that type. * - Attach it to the pending item. * - Move the pending item from the dop_intake list to the * dop_pending list. * > Roll the transaction. * * NOTE: To avoid exceeding the transaction reservation, we limit the * number of items that we attach to a given xfs_defer_pending. * * The actual finishing process looks like this: * * > For each xfs_defer_pending in the dop_pending list, * - Roll the deferred-op transaction as above. * - Create a log done item for that type, and attach it to the * log intent item. * - For each work item attached to the log intent item, * * Perform the described action. * * Attach the work item to the log done item. * * If the result of doing the work was -EAGAIN, ->finish work * wants a new transaction. See the "Requesting a Fresh * Transaction while Finishing Deferred Work" section below for * details. * * The key here is that we must log an intent item for all pending * work items every time we roll the transaction, and that we must log * a done item as soon as the work is completed. With this mechanism * we can perform complex remapping operations, chaining intent items * as needed. * * Requesting a Fresh Transaction while Finishing Deferred Work * * If ->finish_item decides that it needs a fresh transaction to * finish the work, it must ask its caller (xfs_defer_finish) for a * continuation. The most likely cause of this circumstance are the * refcount adjust functions deciding that they've logged enough items * to be at risk of exceeding the transaction reservation. * * To get a fresh transaction, we want to log the existing log done * item to prevent the log intent item from replaying, immediately log * a new log intent item with the unfinished work items, roll the * transaction, and re-call ->finish_item wherever it left off. The * log done item and the new log intent item must be in the same * transaction or atomicity cannot be guaranteed; defer_finish ensures * that this happens. * * This requires some coordination between ->finish_item and * defer_finish. Upon deciding to request a new transaction, * ->finish_item should update the current work item to reflect the * unfinished work. Next, it should reset the log done item's list * count to the number of items finished, and return -EAGAIN. * defer_finish sees the -EAGAIN, logs the new log intent item * with the remaining work items, and leaves the xfs_defer_pending * item at the head of the dop_work queue. Then it rolls the * transaction and picks up processing where it left off. It is * required that ->finish_item must be careful to leave enough * transaction reservation to fit the new log intent item. * * This is an example of remapping the extent (E, E+B) into file X at * offset A and dealing with the extent (C, C+B) already being mapped * there: * +-------------------------------------------------+ * | Unmap file X startblock C offset A length B | t0 * | Intent to reduce refcount for extent (C, B) | * | Intent to remove rmap (X, C, A, B) | * | Intent to free extent (D, 1) (bmbt block) | * | Intent to map (X, A, B) at startblock E | * +-------------------------------------------------+ * | Map file X startblock E offset A length B | t1 * | Done mapping (X, E, A, B) | * | Intent to increase refcount for extent (E, B) | * | Intent to add rmap (X, E, A, B) | * +-------------------------------------------------+ * | Reduce refcount for extent (C, B) | t2 * | Done reducing refcount for extent (C, 9) | * | Intent to reduce refcount for extent (C+9, B-9) | * | (ran out of space after 9 refcount updates) | * +-------------------------------------------------+ * | Reduce refcount for extent (C+9, B+9) | t3 * | Done reducing refcount for extent (C+9, B-9) | * | Increase refcount for extent (E, B) | * | Done increasing refcount for extent (E, B) | * | Intent to free extent (C, B) | * | Intent to free extent (F, 1) (refcountbt block) | * | Intent to remove rmap (F, 1, REFC) | * +-------------------------------------------------+ * | Remove rmap (X, C, A, B) | t4 * | Done removing rmap (X, C, A, B) | * | Add rmap (X, E, A, B) | * | Done adding rmap (X, E, A, B) | * | Remove rmap (F, 1, REFC) | * | Done removing rmap (F, 1, REFC) | * +-------------------------------------------------+ * | Free extent (C, B) | t5 * | Done freeing extent (C, B) | * | Free extent (D, 1) | * | Done freeing extent (D, 1) | * | Free extent (F, 1) | * | Done freeing extent (F, 1) | * +-------------------------------------------------+ * * If we should crash before t2 commits, log recovery replays * the following intent items: * * - Intent to reduce refcount for extent (C, B) * - Intent to remove rmap (X, C, A, B) * - Intent to free extent (D, 1) (bmbt block) * - Intent to increase refcount for extent (E, B) * - Intent to add rmap (X, E, A, B) * * In the process of recovering, it should also generate and take care * of these intent items: * * - Intent to free extent (C, B) * - Intent to free extent (F, 1) (refcountbt block) * - Intent to remove rmap (F, 1, REFC) * * Note that the continuation requested between t2 and t3 is likely to * reoccur. */ STATIC struct xfs_log_item * xfs_defer_barrier_create_intent( struct xfs_trans *tp, struct list_head *items, unsigned int count, bool sort) { return NULL; } STATIC void xfs_defer_barrier_abort_intent( struct xfs_log_item *intent) { /* empty */ } STATIC struct xfs_log_item * xfs_defer_barrier_create_done( struct xfs_trans *tp, struct xfs_log_item *intent, unsigned int count) { return NULL; } STATIC int xfs_defer_barrier_finish_item( struct xfs_trans *tp, struct xfs_log_item *done, struct list_head *item, struct xfs_btree_cur **state) { ASSERT(0); return -EFSCORRUPTED; } STATIC void xfs_defer_barrier_cancel_item( struct list_head *item) { ASSERT(0); } static const struct xfs_defer_op_type xfs_barrier_defer_type = { .max_items = 1, .create_intent = xfs_defer_barrier_create_intent, .abort_intent = xfs_defer_barrier_abort_intent, .create_done = xfs_defer_barrier_create_done, .finish_item = xfs_defer_barrier_finish_item, .cancel_item = xfs_defer_barrier_cancel_item, }; /* Create a log intent done item for a log intent item. */ static inline void xfs_defer_create_done( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { struct xfs_log_item *lip; /* If there is no log intent item, there can be no log done item. */ if (!dfp->dfp_intent) return; /* * Mark the transaction dirty, even on error. This ensures the * transaction is aborted, which: * * 1.) releases the log intent item and frees the log done item * 2.) shuts down the filesystem */ tp->t_flags |= XFS_TRANS_DIRTY; lip = dfp->dfp_ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count); if (!lip) return; tp->t_flags |= XFS_TRANS_HAS_INTENT_DONE; xfs_trans_add_item(tp, lip); set_bit(XFS_LI_DIRTY, &lip->li_flags); dfp->dfp_done = lip; } /* * Ensure there's a log intent item associated with this deferred work item if * the operation must be restarted on crash. Returns 1 if there's a log item; * 0 if there isn't; or a negative errno. */ static int xfs_defer_create_intent( struct xfs_trans *tp, struct xfs_defer_pending *dfp, bool sort) { struct xfs_log_item *lip; if (dfp->dfp_intent) return 1; lip = dfp->dfp_ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort); if (!lip) return 0; if (IS_ERR(lip)) return PTR_ERR(lip); tp->t_flags |= XFS_TRANS_DIRTY; xfs_trans_add_item(tp, lip); set_bit(XFS_LI_DIRTY, &lip->li_flags); dfp->dfp_intent = lip; return 1; } /* * For each pending item in the intake list, log its intent item and the * associated extents, then add the entire intake list to the end of * the pending list. * * Returns 1 if at least one log item was associated with the deferred work; * 0 if there are no log items; or a negative errno. */ static int xfs_defer_create_intents( struct xfs_trans *tp) { struct xfs_defer_pending *dfp; int ret = 0; list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { int ret2; trace_xfs_defer_create_intent(tp->t_mountp, dfp); ret2 = xfs_defer_create_intent(tp, dfp, true); if (ret2 < 0) return ret2; ret |= ret2; } return ret; } static inline void xfs_defer_pending_abort( struct xfs_mount *mp, struct xfs_defer_pending *dfp) { trace_xfs_defer_pending_abort(mp, dfp); if (dfp->dfp_intent && !dfp->dfp_done) { dfp->dfp_ops->abort_intent(dfp->dfp_intent); dfp->dfp_intent = NULL; } } static inline void xfs_defer_pending_cancel_work( struct xfs_mount *mp, struct xfs_defer_pending *dfp) { struct list_head *pwi; struct list_head *n; trace_xfs_defer_cancel_list(mp, dfp); list_del(&dfp->dfp_list); list_for_each_safe(pwi, n, &dfp->dfp_work) { list_del(pwi); dfp->dfp_count--; trace_xfs_defer_cancel_item(mp, dfp, pwi); dfp->dfp_ops->cancel_item(pwi); } ASSERT(dfp->dfp_count == 0); kmem_cache_free(xfs_defer_pending_cache, dfp); } STATIC void xfs_defer_pending_abort_list( struct xfs_mount *mp, struct list_head *dop_list) { struct xfs_defer_pending *dfp; /* Abort intent items that don't have a done item. */ list_for_each_entry(dfp, dop_list, dfp_list) xfs_defer_pending_abort(mp, dfp); } /* Abort all the intents that were committed. */ STATIC void xfs_defer_trans_abort( struct xfs_trans *tp, struct list_head *dop_pending) { trace_xfs_defer_trans_abort(tp, _RET_IP_); xfs_defer_pending_abort_list(tp->t_mountp, dop_pending); } /* * Capture resources that the caller said not to release ("held") when the * transaction commits. Caller is responsible for zero-initializing @dres. */ static int xfs_defer_save_resources( struct xfs_defer_resources *dres, struct xfs_trans *tp) { struct xfs_buf_log_item *bli; struct xfs_inode_log_item *ili; struct xfs_log_item *lip; BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS); list_for_each_entry(lip, &tp->t_items, li_trans) { switch (lip->li_type) { case XFS_LI_BUF: bli = container_of(lip, struct xfs_buf_log_item, bli_item); if (bli->bli_flags & XFS_BLI_HOLD) { if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) { ASSERT(0); return -EFSCORRUPTED; } if (bli->bli_flags & XFS_BLI_ORDERED) dres->dr_ordered |= (1U << dres->dr_bufs); else xfs_trans_dirty_buf(tp, bli->bli_buf); dres->dr_bp[dres->dr_bufs++] = bli->bli_buf; } break; case XFS_LI_INODE: ili = container_of(lip, struct xfs_inode_log_item, ili_item); if (ili->ili_lock_flags == 0) { if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) { ASSERT(0); return -EFSCORRUPTED; } xfs_trans_log_inode(tp, ili->ili_inode, XFS_ILOG_CORE); dres->dr_ip[dres->dr_inos++] = ili->ili_inode; } break; default: break; } } return 0; } /* Attach the held resources to the transaction. */ static void xfs_defer_restore_resources( struct xfs_trans *tp, struct xfs_defer_resources *dres) { unsigned short i; /* Rejoin the joined inodes. */ for (i = 0; i < dres->dr_inos; i++) xfs_trans_ijoin(tp, dres->dr_ip[i], 0); /* Rejoin the buffers and dirty them so the log moves forward. */ for (i = 0; i < dres->dr_bufs; i++) { xfs_trans_bjoin(tp, dres->dr_bp[i]); if (dres->dr_ordered & (1U << i)) xfs_trans_ordered_buf(tp, dres->dr_bp[i]); xfs_trans_bhold(tp, dres->dr_bp[i]); } } /* Roll a transaction so we can do some deferred op processing. */ STATIC int xfs_defer_trans_roll( struct xfs_trans **tpp) { struct xfs_defer_resources dres = { }; int error; error = xfs_defer_save_resources(&dres, *tpp); if (error) return error; trace_xfs_defer_trans_roll(*tpp, _RET_IP_); /* * Roll the transaction. Rolling always given a new transaction (even * if committing the old one fails!) to hand back to the caller, so we * join the held resources to the new transaction so that we always * return with the held resources joined to @tpp, no matter what * happened. */ error = xfs_trans_roll(tpp); xfs_defer_restore_resources(*tpp, &dres); if (error) trace_xfs_defer_trans_roll_error(*tpp, error); return error; } /* * Free up any items left in the list. */ static void xfs_defer_cancel_list( struct xfs_mount *mp, struct list_head *dop_list) { struct xfs_defer_pending *dfp; struct xfs_defer_pending *pli; /* * Free the pending items. Caller should already have arranged * for the intent items to be released. */ list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) xfs_defer_pending_cancel_work(mp, dfp); } static inline void xfs_defer_relog_intent( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { struct xfs_log_item *lip; xfs_defer_create_done(tp, dfp); lip = dfp->dfp_ops->relog_intent(tp, dfp->dfp_intent, dfp->dfp_done); if (lip) { xfs_trans_add_item(tp, lip); set_bit(XFS_LI_DIRTY, &lip->li_flags); } dfp->dfp_done = NULL; dfp->dfp_intent = lip; } /* * Prevent a log intent item from pinning the tail of the log by logging a * done item to release the intent item; and then log a new intent item. * The caller should provide a fresh transaction and roll it after we're done. */ static void xfs_defer_relog( struct xfs_trans **tpp, struct list_head *dfops) { struct xlog *log = (*tpp)->t_mountp->m_log; struct xfs_defer_pending *dfp; xfs_lsn_t threshold_lsn = NULLCOMMITLSN; ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES); list_for_each_entry(dfp, dfops, dfp_list) { /* * If the log intent item for this deferred op is not a part of * the current log checkpoint, relog the intent item to keep * the log tail moving forward. We're ok with this being racy * because an incorrect decision means we'll be a little slower * at pushing the tail. */ if (dfp->dfp_intent == NULL || xfs_log_item_in_current_chkpt(dfp->dfp_intent)) continue; /* * Figure out where we need the tail to be in order to maintain * the minimum required free space in the log. Only sample * the log threshold once per call. */ if (threshold_lsn == NULLCOMMITLSN) { threshold_lsn = xlog_grant_push_threshold(log, 0); if (threshold_lsn == NULLCOMMITLSN) break; } if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0) continue; trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp); XFS_STATS_INC((*tpp)->t_mountp, defer_relog); xfs_defer_relog_intent(*tpp, dfp); } } /* * Log an intent-done item for the first pending intent, and finish the work * items. */ int xfs_defer_finish_one( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { const struct xfs_defer_op_type *ops = dfp->dfp_ops; struct xfs_btree_cur *state = NULL; struct list_head *li, *n; int error; trace_xfs_defer_pending_finish(tp->t_mountp, dfp); xfs_defer_create_done(tp, dfp); list_for_each_safe(li, n, &dfp->dfp_work) { list_del(li); dfp->dfp_count--; trace_xfs_defer_finish_item(tp->t_mountp, dfp, li); error = ops->finish_item(tp, dfp->dfp_done, li, &state); if (error == -EAGAIN) { int ret; /* * Caller wants a fresh transaction; put the work item * back on the list and log a new log intent item to * replace the old one. See "Requesting a Fresh * Transaction while Finishing Deferred Work" above. */ list_add(li, &dfp->dfp_work); dfp->dfp_count++; dfp->dfp_done = NULL; dfp->dfp_intent = NULL; ret = xfs_defer_create_intent(tp, dfp, false); if (ret < 0) error = ret; } if (error) goto out; } /* Done with the dfp, free it. */ list_del(&dfp->dfp_list); kmem_cache_free(xfs_defer_pending_cache, dfp); out: if (ops->finish_cleanup) ops->finish_cleanup(tp, state, error); return error; } /* Move all paused deferred work from @tp to @paused_list. */ static void xfs_defer_isolate_paused( struct xfs_trans *tp, struct list_head *paused_list) { struct xfs_defer_pending *dfp; struct xfs_defer_pending *pli; list_for_each_entry_safe(dfp, pli, &tp->t_dfops, dfp_list) { if (!(dfp->dfp_flags & XFS_DEFER_PAUSED)) continue; list_move_tail(&dfp->dfp_list, paused_list); trace_xfs_defer_isolate_paused(tp->t_mountp, dfp); } } /* * Finish all the pending work. This involves logging intent items for * any work items that wandered in since the last transaction roll (if * one has even happened), rolling the transaction, and finishing the * work items in the first item on the logged-and-pending list. * * If an inode is provided, relog it to the new transaction. */ int xfs_defer_finish_noroll( struct xfs_trans **tp) { struct xfs_defer_pending *dfp = NULL; int error = 0; LIST_HEAD(dop_pending); LIST_HEAD(dop_paused); ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); trace_xfs_defer_finish(*tp, _RET_IP_); /* Until we run out of pending work to finish... */ while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) { /* * Deferred items that are created in the process of finishing * other deferred work items should be queued at the head of * the pending list, which puts them ahead of the deferred work * that was created by the caller. This keeps the number of * pending work items to a minimum, which decreases the amount * of time that any one intent item can stick around in memory, * pinning the log tail. */ int has_intents = xfs_defer_create_intents(*tp); xfs_defer_isolate_paused(*tp, &dop_paused); list_splice_init(&(*tp)->t_dfops, &dop_pending); if (has_intents < 0) { error = has_intents; goto out_shutdown; } if (has_intents || dfp) { error = xfs_defer_trans_roll(tp); if (error) goto out_shutdown; /* Relog intent items to keep the log moving. */ xfs_defer_relog(tp, &dop_pending); xfs_defer_relog(tp, &dop_paused); if ((*tp)->t_flags & XFS_TRANS_DIRTY) { error = xfs_defer_trans_roll(tp); if (error) goto out_shutdown; } } dfp = list_first_entry_or_null(&dop_pending, struct xfs_defer_pending, dfp_list); if (!dfp) break; error = xfs_defer_finish_one(*tp, dfp); if (error && error != -EAGAIN) goto out_shutdown; } /* Requeue the paused items in the outgoing transaction. */ list_splice_tail_init(&dop_paused, &(*tp)->t_dfops); trace_xfs_defer_finish_done(*tp, _RET_IP_); return 0; out_shutdown: list_splice_tail_init(&dop_paused, &dop_pending); xfs_defer_trans_abort(*tp, &dop_pending); xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); trace_xfs_defer_finish_error(*tp, error); xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending); xfs_defer_cancel(*tp); return error; } int xfs_defer_finish( struct xfs_trans **tp) { #ifdef DEBUG struct xfs_defer_pending *dfp; #endif int error; /* * Finish and roll the transaction once more to avoid returning to the * caller with a dirty transaction. */ error = xfs_defer_finish_noroll(tp); if (error) return error; if ((*tp)->t_flags & XFS_TRANS_DIRTY) { error = xfs_defer_trans_roll(tp); if (error) { xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); return error; } } /* Reset LOWMODE now that we've finished all the dfops. */ #ifdef DEBUG list_for_each_entry(dfp, &(*tp)->t_dfops, dfp_list) ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED); #endif (*tp)->t_flags &= ~XFS_TRANS_LOWMODE; return 0; } void xfs_defer_cancel( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; trace_xfs_defer_cancel(tp, _RET_IP_); xfs_defer_trans_abort(tp, &tp->t_dfops); xfs_defer_cancel_list(mp, &tp->t_dfops); } /* * Return the last pending work item attached to this transaction if it matches * the deferred op type. */ static inline struct xfs_defer_pending * xfs_defer_find_last( struct xfs_trans *tp, const struct xfs_defer_op_type *ops) { struct xfs_defer_pending *dfp = NULL; /* No dfops at all? */ if (list_empty(&tp->t_dfops)) return NULL; dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending, dfp_list); /* Wrong type? */ if (dfp->dfp_ops != ops) return NULL; return dfp; } /* * Decide if we can add a deferred work item to the last dfops item attached * to the transaction. */ static inline bool xfs_defer_can_append( struct xfs_defer_pending *dfp, const struct xfs_defer_op_type *ops) { /* Already logged? */ if (dfp->dfp_intent) return false; /* Paused items cannot absorb more work */ if (dfp->dfp_flags & XFS_DEFER_PAUSED) return NULL; /* Already full? */ if (ops->max_items && dfp->dfp_count >= ops->max_items) return false; return true; } /* Create a new pending item at the end of the transaction list. */ static inline struct xfs_defer_pending * xfs_defer_alloc( struct list_head *dfops, const struct xfs_defer_op_type *ops) { struct xfs_defer_pending *dfp; dfp = kmem_cache_zalloc(xfs_defer_pending_cache, GFP_KERNEL | __GFP_NOFAIL); dfp->dfp_ops = ops; INIT_LIST_HEAD(&dfp->dfp_work); list_add_tail(&dfp->dfp_list, dfops); return dfp; } /* Add an item for later deferred processing. */ struct xfs_defer_pending * xfs_defer_add( struct xfs_trans *tp, struct list_head *li, const struct xfs_defer_op_type *ops) { struct xfs_defer_pending *dfp = NULL; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); dfp = xfs_defer_find_last(tp, ops); if (!dfp || !xfs_defer_can_append(dfp, ops)) dfp = xfs_defer_alloc(&tp->t_dfops, ops); xfs_defer_add_item(dfp, li); trace_xfs_defer_add_item(tp->t_mountp, dfp, li); return dfp; } /* * Add a defer ops barrier to force two otherwise adjacent deferred work items * to be tracked separately and have separate log items. */ void xfs_defer_add_barrier( struct xfs_trans *tp) { struct xfs_defer_pending *dfp; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); /* If the last defer op added was a barrier, we're done. */ dfp = xfs_defer_find_last(tp, &xfs_barrier_defer_type); if (dfp) return; xfs_defer_alloc(&tp->t_dfops, &xfs_barrier_defer_type); trace_xfs_defer_add_item(tp->t_mountp, dfp, NULL); } /* * Create a pending deferred work item to replay the recovered intent item * and add it to the list. */ void xfs_defer_start_recovery( struct xfs_log_item *lip, struct list_head *r_dfops, const struct xfs_defer_op_type *ops) { struct xfs_defer_pending *dfp = xfs_defer_alloc(r_dfops, ops); dfp->dfp_intent = lip; } /* * Cancel a deferred work item created to recover a log intent item. @dfp * will be freed after this function returns. */ void xfs_defer_cancel_recovery( struct xfs_mount *mp, struct xfs_defer_pending *dfp) { xfs_defer_pending_abort(mp, dfp); xfs_defer_pending_cancel_work(mp, dfp); } /* Replay the deferred work item created from a recovered log intent item. */ int xfs_defer_finish_recovery( struct xfs_mount *mp, struct xfs_defer_pending *dfp, struct list_head *capture_list) { const struct xfs_defer_op_type *ops = dfp->dfp_ops; int error; /* dfp is freed by recover_work and must not be accessed afterwards */ error = ops->recover_work(dfp, capture_list); if (error) trace_xlog_intent_recovery_failed(mp, ops, error); return error; } /* * Move deferred ops from one transaction to another and reset the source to * initial state. This is primarily used to carry state forward across * transaction rolls with pending dfops. */ void xfs_defer_move( struct xfs_trans *dtp, struct xfs_trans *stp) { list_splice_init(&stp->t_dfops, &dtp->t_dfops); /* * Low free space mode was historically controlled by a dfops field. * This meant that low mode state potentially carried across multiple * transaction rolls. Transfer low mode on a dfops move to preserve * that behavior. */ dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE); stp->t_flags &= ~XFS_TRANS_LOWMODE; } /* * Prepare a chain of fresh deferred ops work items to be completed later. Log * recovery requires the ability to put off until later the actual finishing * work so that it can process unfinished items recovered from the log in * correct order. * * Create and log intent items for all the work that we're capturing so that we * can be assured that the items will get replayed if the system goes down * before log recovery gets a chance to finish the work it put off. The entire * deferred ops state is transferred to the capture structure and the * transaction is then ready for the caller to commit it. If there are no * intent items to capture, this function returns NULL. * * If capture_ip is not NULL, the capture structure will obtain an extra * reference to the inode. */ static struct xfs_defer_capture * xfs_defer_ops_capture( struct xfs_trans *tp) { struct xfs_defer_capture *dfc; unsigned short i; int error; if (list_empty(&tp->t_dfops)) return NULL; error = xfs_defer_create_intents(tp); if (error < 0) return ERR_PTR(error); /* Create an object to capture the defer ops. */ dfc = kzalloc(sizeof(*dfc), GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&dfc->dfc_list); INIT_LIST_HEAD(&dfc->dfc_dfops); /* Move the dfops chain and transaction state to the capture struct. */ list_splice_init(&tp->t_dfops, &dfc->dfc_dfops); dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE; tp->t_flags &= ~XFS_TRANS_LOWMODE; /* Capture the remaining block reservations along with the dfops. */ dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used; dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used; /* Preserve the log reservation size. */ dfc->dfc_logres = tp->t_log_res; error = xfs_defer_save_resources(&dfc->dfc_held, tp); if (error) { /* * Resource capture should never fail, but if it does, we * still have to shut down the log and release things * properly. */ xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE); } /* * Grab extra references to the inodes and buffers because callers are * expected to release their held references after we commit the * transaction. */ for (i = 0; i < dfc->dfc_held.dr_inos; i++) { xfs_assert_ilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL); ihold(VFS_I(dfc->dfc_held.dr_ip[i])); } for (i = 0; i < dfc->dfc_held.dr_bufs; i++) xfs_buf_hold(dfc->dfc_held.dr_bp[i]); return dfc; } /* Release all resources that we used to capture deferred ops. */ void xfs_defer_ops_capture_abort( struct xfs_mount *mp, struct xfs_defer_capture *dfc) { unsigned short i; xfs_defer_pending_abort_list(mp, &dfc->dfc_dfops); xfs_defer_cancel_list(mp, &dfc->dfc_dfops); for (i = 0; i < dfc->dfc_held.dr_bufs; i++) xfs_buf_relse(dfc->dfc_held.dr_bp[i]); for (i = 0; i < dfc->dfc_held.dr_inos; i++) xfs_irele(dfc->dfc_held.dr_ip[i]); kfree(dfc); } /* * Capture any deferred ops and commit the transaction. This is the last step * needed to finish a log intent item that we recovered from the log. If any * of the deferred ops operate on an inode, the caller must pass in that inode * so that the reference can be transferred to the capture structure. The * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling * xfs_defer_ops_continue. */ int xfs_defer_ops_capture_and_commit( struct xfs_trans *tp, struct list_head *capture_list) { struct xfs_mount *mp = tp->t_mountp; struct xfs_defer_capture *dfc; int error; /* If we don't capture anything, commit transaction and exit. */ dfc = xfs_defer_ops_capture(tp); if (IS_ERR(dfc)) { xfs_trans_cancel(tp); return PTR_ERR(dfc); } if (!dfc) return xfs_trans_commit(tp); /* Commit the transaction and add the capture structure to the list. */ error = xfs_trans_commit(tp); if (error) { xfs_defer_ops_capture_abort(mp, dfc); return error; } list_add_tail(&dfc->dfc_list, capture_list); return 0; } /* * Attach a chain of captured deferred ops to a new transaction and free the * capture structure. If an inode was captured, it will be passed back to the * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0. * The caller now owns the inode reference. */ void xfs_defer_ops_continue( struct xfs_defer_capture *dfc, struct xfs_trans *tp, struct xfs_defer_resources *dres) { unsigned int i; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); /* Lock the captured resources to the new transaction. */ if (dfc->dfc_held.dr_inos == 2) xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL, dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL); else if (dfc->dfc_held.dr_inos == 1) xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL); for (i = 0; i < dfc->dfc_held.dr_bufs; i++) xfs_buf_lock(dfc->dfc_held.dr_bp[i]); /* Join the captured resources to the new transaction. */ xfs_defer_restore_resources(tp, &dfc->dfc_held); memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources)); dres->dr_bufs = 0; /* Move captured dfops chain and state to the transaction. */ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops); tp->t_flags |= dfc->dfc_tpflags; kfree(dfc); } /* Release the resources captured and continued during recovery. */ void xfs_defer_resources_rele( struct xfs_defer_resources *dres) { unsigned short i; for (i = 0; i < dres->dr_inos; i++) { xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL); xfs_irele(dres->dr_ip[i]); dres->dr_ip[i] = NULL; } for (i = 0; i < dres->dr_bufs; i++) { xfs_buf_relse(dres->dr_bp[i]); dres->dr_bp[i] = NULL; } dres->dr_inos = 0; dres->dr_bufs = 0; dres->dr_ordered = 0; } static inline int __init xfs_defer_init_cache(void) { xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending", sizeof(struct xfs_defer_pending), 0, 0, NULL); return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM; } static inline void xfs_defer_destroy_cache(void) { kmem_cache_destroy(xfs_defer_pending_cache); xfs_defer_pending_cache = NULL; } /* Set up caches for deferred work items. */ int __init xfs_defer_init_item_caches(void) { int error; error = xfs_defer_init_cache(); if (error) return error; error = xfs_rmap_intent_init_cache(); if (error) goto err; error = xfs_refcount_intent_init_cache(); if (error) goto err; error = xfs_bmap_intent_init_cache(); if (error) goto err; error = xfs_extfree_intent_init_cache(); if (error) goto err; error = xfs_attr_intent_init_cache(); if (error) goto err; return 0; err: xfs_defer_destroy_item_caches(); return error; } /* Destroy all the deferred work item caches, if they've been allocated. */ void xfs_defer_destroy_item_caches(void) { xfs_attr_intent_destroy_cache(); xfs_extfree_intent_destroy_cache(); xfs_bmap_intent_destroy_cache(); xfs_refcount_intent_destroy_cache(); xfs_rmap_intent_destroy_cache(); xfs_defer_destroy_cache(); } /* * Mark a deferred work item so that it will be requeued indefinitely without * being finished. Caller must ensure there are no data dependencies on this * work item in the meantime. */ void xfs_defer_item_pause( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { ASSERT(!(dfp->dfp_flags & XFS_DEFER_PAUSED)); dfp->dfp_flags |= XFS_DEFER_PAUSED; trace_xfs_defer_item_pause(tp->t_mountp, dfp); } /* * Release a paused deferred work item so that it will be finished during the * next transaction roll. */ void xfs_defer_item_unpause( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED); dfp->dfp_flags &= ~XFS_DEFER_PAUSED; trace_xfs_defer_item_unpause(tp->t_mountp, dfp); }
8 8 8 8 1 8 8 8 8 8 8 5 5 8 8 8 8 8 8 5 5 5 5 5 5 5 8 8 8 8 8 8 5 5 5 5 8 5 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS segment buffer * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. * */ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/crc32.h> #include <linux/backing-dev.h> #include <linux/slab.h> #include "page.h" #include "segbuf.h" struct nilfs_write_info { struct the_nilfs *nilfs; struct bio *bio; int start, end; /* The region to be submitted */ int rest_blocks; int max_pages; int nr_vecs; sector_t blocknr; }; static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, struct the_nilfs *nilfs); static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) { struct nilfs_segment_buffer *segbuf; segbuf = kmem_cache_alloc(nilfs_segbuf_cachep, GFP_NOFS); if (unlikely(!segbuf)) return NULL; segbuf->sb_super = sb; INIT_LIST_HEAD(&segbuf->sb_list); INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); INIT_LIST_HEAD(&segbuf->sb_payload_buffers); segbuf->sb_super_root = NULL; init_completion(&segbuf->sb_bio_event); atomic_set(&segbuf->sb_err, 0); segbuf->sb_nbio = 0; return segbuf; } void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf) { kmem_cache_free(nilfs_segbuf_cachep, segbuf); } void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, unsigned long offset, struct the_nilfs *nilfs) { segbuf->sb_segnum = segnum; nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start, &segbuf->sb_fseg_end); segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset; segbuf->sb_rest_blocks = segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; } /** * nilfs_segbuf_map_cont - map a new log behind a given log * @segbuf: new segment buffer * @prev: segment buffer containing a log to be continued */ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, struct nilfs_segment_buffer *prev) { segbuf->sb_segnum = prev->sb_segnum; segbuf->sb_fseg_start = prev->sb_fseg_start; segbuf->sb_fseg_end = prev->sb_fseg_end; segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks; segbuf->sb_rest_blocks = segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; } void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, __u64 nextnum, struct the_nilfs *nilfs) { segbuf->sb_nextnum = nextnum; segbuf->sb_sum.next = nilfs_get_segment_start_blocknr(nilfs, nextnum); } int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf) { struct buffer_head *bh; bh = sb_getblk(segbuf->sb_super, segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); if (!buffer_uptodate(bh)) { memset(bh->b_data, 0, bh->b_size); set_buffer_uptodate(bh); } unlock_buffer(bh); nilfs_segbuf_add_segsum_buffer(segbuf, bh); return 0; } int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, struct buffer_head **bhp) { struct buffer_head *bh; bh = sb_getblk(segbuf->sb_super, segbuf->sb_pseg_start + segbuf->sb_sum.nblocks); if (unlikely(!bh)) return -ENOMEM; nilfs_segbuf_add_payload_buffer(segbuf, bh); *bhp = bh; return 0; } int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned int flags, time64_t ctime, __u64 cno) { int err; segbuf->sb_sum.nblocks = segbuf->sb_sum.nsumblk = 0; err = nilfs_segbuf_extend_segsum(segbuf); if (unlikely(err)) return err; segbuf->sb_sum.flags = flags; segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; segbuf->sb_sum.ctime = ctime; segbuf->sb_sum.cno = cno; return 0; } /* * Setup segment summary */ void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf) { struct nilfs_segment_summary *raw_sum; struct buffer_head *bh_sum; bh_sum = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, b_assoc_buffers); raw_sum = (struct nilfs_segment_summary *)bh_sum->b_data; raw_sum->ss_magic = cpu_to_le32(NILFS_SEGSUM_MAGIC); raw_sum->ss_bytes = cpu_to_le16(sizeof(*raw_sum)); raw_sum->ss_flags = cpu_to_le16(segbuf->sb_sum.flags); raw_sum->ss_seq = cpu_to_le64(segbuf->sb_sum.seg_seq); raw_sum->ss_create = cpu_to_le64(segbuf->sb_sum.ctime); raw_sum->ss_next = cpu_to_le64(segbuf->sb_sum.next); raw_sum->ss_nblocks = cpu_to_le32(segbuf->sb_sum.nblocks); raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo); raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes); raw_sum->ss_pad = 0; raw_sum->ss_cno = cpu_to_le64(segbuf->sb_sum.cno); } /* * CRC calculation routines */ static void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, u32 seed) { struct buffer_head *bh; struct nilfs_segment_summary *raw_sum; unsigned long size, bytes = segbuf->sb_sum.sumbytes; u32 crc; bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, b_assoc_buffers); raw_sum = (struct nilfs_segment_summary *)bh->b_data; size = min_t(unsigned long, bytes, bh->b_size); crc = crc32_le(seed, (unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum) + sizeof(raw_sum->ss_sumsum), size - (sizeof(raw_sum->ss_datasum) + sizeof(raw_sum->ss_sumsum))); list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { bytes -= size; size = min_t(unsigned long, bytes, bh->b_size); crc = crc32_le(crc, bh->b_data, size); } raw_sum->ss_sumsum = cpu_to_le32(crc); } static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, u32 seed) { struct buffer_head *bh; struct nilfs_segment_summary *raw_sum; void *kaddr; u32 crc; bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, b_assoc_buffers); raw_sum = (struct nilfs_segment_summary *)bh->b_data; crc = crc32_le(seed, (unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum), bh->b_size - sizeof(raw_sum->ss_datasum)); list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { kaddr = kmap_local_page(bh->b_page); crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); kunmap_local(kaddr); } raw_sum->ss_datasum = cpu_to_le32(crc); } static void nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf, u32 seed) { struct nilfs_super_root *raw_sr; struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info; unsigned int srsize; u32 crc; raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; srsize = NILFS_SR_BYTES(nilfs->ns_inode_size); crc = crc32_le(seed, (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), srsize - sizeof(raw_sr->sr_sum)); raw_sr->sr_sum = cpu_to_le32(crc); } static void nilfs_release_buffers(struct list_head *list) { struct buffer_head *bh, *n; list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { list_del_init(&bh->b_assoc_buffers); brelse(bh); } } static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) { nilfs_release_buffers(&segbuf->sb_segsum_buffers); nilfs_release_buffers(&segbuf->sb_payload_buffers); segbuf->sb_super_root = NULL; } /* * Iterators for segment buffers */ void nilfs_clear_logs(struct list_head *logs) { struct nilfs_segment_buffer *segbuf; list_for_each_entry(segbuf, logs, sb_list) nilfs_segbuf_clear(segbuf); } void nilfs_truncate_logs(struct list_head *logs, struct nilfs_segment_buffer *last) { struct nilfs_segment_buffer *n, *segbuf; segbuf = list_prepare_entry(last, logs, sb_list); list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) { list_del_init(&segbuf->sb_list); nilfs_segbuf_clear(segbuf); nilfs_segbuf_free(segbuf); } } int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs) { struct nilfs_segment_buffer *segbuf; int ret = 0; list_for_each_entry(segbuf, logs, sb_list) { ret = nilfs_segbuf_write(segbuf, nilfs); if (ret) break; } return ret; } int nilfs_wait_on_logs(struct list_head *logs) { struct nilfs_segment_buffer *segbuf; int err, ret = 0; list_for_each_entry(segbuf, logs, sb_list) { err = nilfs_segbuf_wait(segbuf); if (err && !ret) ret = err; } return ret; } /** * nilfs_add_checksums_on_logs - add checksums on the logs * @logs: list of segment buffers storing target logs * @seed: checksum seed value */ void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; list_for_each_entry(segbuf, logs, sb_list) { if (segbuf->sb_super_root) nilfs_segbuf_fill_in_super_root_crc(segbuf, seed); nilfs_segbuf_fill_in_segsum_crc(segbuf, seed); nilfs_segbuf_fill_in_data_crc(segbuf, seed); } } /* * BIO operations */ static void nilfs_end_bio_write(struct bio *bio) { struct nilfs_segment_buffer *segbuf = bio->bi_private; if (bio->bi_status) atomic_inc(&segbuf->sb_err); bio_put(bio); complete(&segbuf->sb_bio_event); } static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi) { struct bio *bio = wi->bio; bio->bi_end_io = nilfs_end_bio_write; bio->bi_private = segbuf; submit_bio(bio); segbuf->sb_nbio++; wi->bio = NULL; wi->rest_blocks -= wi->end - wi->start; wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end; return 0; } static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi) { wi->bio = NULL; wi->rest_blocks = segbuf->sb_sum.nblocks; wi->max_pages = BIO_MAX_VECS; wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; wi->blocknr = segbuf->sb_pseg_start; } static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi, struct buffer_head *bh) { int len, err; BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs, REQ_OP_WRITE, GFP_NOIO); wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) << (wi->nilfs->ns_blocksize_bits - 9); } len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); if (len == bh->b_size) { wi->end++; return 0; } /* bio is FULL */ err = nilfs_segbuf_submit_bio(segbuf, wi); /* never submit current bh */ if (likely(!err)) goto repeat; return err; } /** * nilfs_segbuf_write - submit write requests of a log * @segbuf: buffer storing a log to be written * @nilfs: nilfs object * * Return Value: On Success, 0 is returned. On Error, one of the following * negative error code is returned. * * %-EIO - I/O error * * %-ENOMEM - Insufficient memory available. */ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, struct the_nilfs *nilfs) { struct nilfs_write_info wi; struct buffer_head *bh; int res = 0; wi.nilfs = nilfs; nilfs_segbuf_prepare_write(segbuf, &wi); list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { res = nilfs_segbuf_submit_bh(segbuf, &wi, bh); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { res = nilfs_segbuf_submit_bh(segbuf, &wi, bh); if (unlikely(res)) goto failed_bio; } if (wi.bio) { /* * Last BIO is always sent through the following * submission. */ wi.bio->bi_opf |= REQ_SYNC; res = nilfs_segbuf_submit_bio(segbuf, &wi); } failed_bio: return res; } /** * nilfs_segbuf_wait - wait for completion of requested BIOs * @segbuf: segment buffer * * Return Value: On Success, 0 is returned. On Error, one of the following * negative error code is returned. * * %-EIO - I/O error */ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) { int err = 0; if (!segbuf->sb_nbio) return 0; do { wait_for_completion(&segbuf->sb_bio_event); } while (--segbuf->sb_nbio > 0); if (unlikely(atomic_read(&segbuf->sb_err) > 0)) { nilfs_err(segbuf->sb_super, "I/O error writing log (start-blocknr=%llu, block-count=%lu) in segment %llu", (unsigned long long)segbuf->sb_pseg_start, segbuf->sb_sum.nblocks, (unsigned long long)segbuf->sb_segnum); err = -EIO; } return err; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Counters support */ /* 2 Comments support */ /* 3 Forceadd support */ /* 4 skbinfo support */ /* 5 bucketsize, initval support */ #define IPSET_TYPE_REV_MAX 6 /* bitmask support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ #define HTYPE hash_ip #define IP_SET_HASH_WITH_NETMASK #define IP_SET_HASH_WITH_BITMASK /* IPv4 variant */ /* Member elements */ struct hash_ip4_elem { /* Zero valued IP addresses cannot be stored */ __be32 ip; }; /* Common functions */ static bool hash_ip4_data_equal(const struct hash_ip4_elem *e1, const struct hash_ip4_elem *e2, u32 *multi) { return e1->ip == e2->ip; } static bool hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e) { next->ip = e->ip; } #define MTYPE hash_ip4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); ip &= h->bitmask.ip; if (ip == 0) return -EINVAL; e.ip = ip; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, hosts, i = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ip &= ntohl(h->bitmask.ip); e.ip = htonl(ip); if (e.ip == 0) return -IPSET_ERR_HASH_ELEM; if (adt == IPSET_TEST) return adtfn(set, &e, &ext, &ext, flags); ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) { if (ip_to == 0) return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; i++) { e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_ip4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ip += hosts; if (ip == 0) return 0; ret = 0; } return ret; } /* IPv6 variant */ /* Member elements */ struct hash_ip6_elem { union nf_inet_addr ip; }; /* Common functions */ static bool hash_ip6_data_equal(const struct hash_ip6_elem *ip1, const struct hash_ip6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6); } static bool hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e) { } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ip6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); nf_inet_addr_mask_inplace(&e.ip, &h->bitmask); if (ipv6_addr_any(&e.ip.in6)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; nf_inet_addr_mask_inplace(&e.ip, &h->bitmask); if (ipv6_addr_any(&e.ip.in6)) return -IPSET_ERR_HASH_ELEM; ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } static struct ip_set_type hash_ip_type __read_mostly = { .name = "hash:ip", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, [IPSET_ATTR_BITMASK] = { .type = NLA_NESTED }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ip_init(void) { return ip_set_type_register(&hash_ip_type); } static void __exit hash_ip_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ip_type); } module_init(hash_ip_init); module_exit(hash_ip_fini);
2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2021 Facebook */ #ifndef __MMAP_UNLOCK_WORK_H__ #define __MMAP_UNLOCK_WORK_H__ #include <linux/irq_work.h> /* irq_work to run mmap_read_unlock() in irq_work */ struct mmap_unlock_irq_work { struct irq_work irq_work; struct mm_struct *mm; }; DECLARE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); /* * We cannot do mmap_read_unlock() when the irq is disabled, because of * risk to deadlock with rq_lock. To look up vma when the irqs are * disabled, we need to run mmap_read_unlock() in irq_work. We use a * percpu variable to do the irq_work. If the irq_work is already used * by another lookup, we fall over. */ static inline bool bpf_mmap_unlock_get_irq_work(struct mmap_unlock_irq_work **work_ptr) { struct mmap_unlock_irq_work *work = NULL; bool irq_work_busy = false; if (irqs_disabled()) { if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { work = this_cpu_ptr(&mmap_unlock_work); if (irq_work_is_busy(&work->irq_work)) { /* cannot queue more up_read, fallback */ irq_work_busy = true; } } else { /* * PREEMPT_RT does not allow to trylock mmap sem in * interrupt disabled context. Force the fallback code. */ irq_work_busy = true; } } *work_ptr = work; return irq_work_busy; } static inline void bpf_mmap_unlock_mm(struct mmap_unlock_irq_work *work, struct mm_struct *mm) { if (!work) { mmap_read_unlock(mm); } else { work->mm = mm; /* The lock will be released once we're out of interrupt * context. Tell lockdep that we've released it now so * it doesn't complain that we forgot to release it. */ rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); irq_work_queue(&work->irq_work); } } #endif /* __MMAP_UNLOCK_WORK_H__ */
998 798 800 999 7 1534 2 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 /* SPDX-License-Identifier: GPL-2.0 */ /* * This header is used to share core functionality between the * standalone connection tracking module, and the compatibility layer's use * of connection tracking. * * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> * - generalize L3 protocol dependent part. * * Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h */ #ifndef _NF_CONNTRACK_CORE_H #define _NF_CONNTRACK_CORE_H #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_l4proto.h> /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state); int nf_conntrack_init_net(struct net *net); void nf_conntrack_cleanup_net(struct net *net); void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); void nf_conntrack_proto_pernet_init(struct net *net); int nf_conntrack_proto_init(void); void nf_conntrack_proto_fini(void); int nf_conntrack_init_start(void); void nf_conntrack_cleanup_start(void); void nf_conntrack_init_end(void); void nf_conntrack_cleanup_end(void); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); int __nf_conntrack_confirm(struct sk_buff *skb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int nf_conntrack_confirm(struct sk_buff *skb) { struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); int ret = NF_ACCEPT; if (ct) { if (!nf_ct_is_confirmed(ct)) { ret = __nf_conntrack_confirm(skb); if (ret == NF_ACCEPT) ct = (struct nf_conn *)skb_nfct(skb); } if (ret == NF_ACCEPT && nf_ct_ecache_exist(ct)) nf_ct_deliver_cached_events(ct); } return ret; } unsigned int nf_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *proto); #define CONNTRACK_LOCKS 1024 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) { if (timeout > INT_MAX) timeout = INT_MAX; if (nf_ct_is_confirmed(ct)) WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); else ct->timeout = (u32)timeout; } int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off); int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status); #endif /* _NF_CONNTRACK_CORE_H */
20 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2011 Instituto Nokia de Tecnologia * * Authors: * Lauro Ramos Venancio <lauro.venancio@openbossa.org> * Aloisio Almeida Jr <aloisio.almeida@openbossa.org> */ #ifndef __LOCAL_NFC_H #define __LOCAL_NFC_H #include <net/nfc/nfc.h> #include <net/sock.h> #define NFC_TARGET_MODE_IDLE 0 #define NFC_TARGET_MODE_SLEEP 1 struct nfc_protocol { int id; struct proto *proto; struct module *owner; int (*create)(struct net *net, struct socket *sock, const struct nfc_protocol *nfc_proto, int kern); }; struct nfc_rawsock { struct sock sk; struct nfc_dev *dev; u32 target_idx; struct work_struct tx_work; bool tx_work_scheduled; }; struct nfc_sock_list { struct hlist_head head; rwlock_t lock; }; #define nfc_rawsock(sk) ((struct nfc_rawsock *) sk) #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) struct nfc_llcp_sdp_tlv; void nfc_llcp_mac_is_down(struct nfc_dev *dev); void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_llcp_register_device(struct nfc_dev *dev); void nfc_llcp_unregister_device(struct nfc_dev *dev); int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); int nfc_llcp_local_put(struct nfc_llcp_local *local); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head); int __init rawsock_init(void); void rawsock_exit(void); int __init af_nfc_init(void); void af_nfc_exit(void); int nfc_proto_register(const struct nfc_protocol *nfc_proto); void nfc_proto_unregister(const struct nfc_protocol *nfc_proto); extern int nfc_devlist_generation; extern struct mutex nfc_devlist_mutex; int __init nfc_genl_init(void); void nfc_genl_exit(void); void nfc_genl_data_init(struct nfc_genl_data *genl_data); void nfc_genl_data_exit(struct nfc_genl_data *genl_data); int nfc_genl_targets_found(struct nfc_dev *dev); int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx); int nfc_genl_device_added(struct nfc_dev *dev); int nfc_genl_device_removed(struct nfc_dev *dev); int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_genl_dep_link_down_event(struct nfc_dev *dev); int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol); int nfc_genl_tm_deactivated(struct nfc_dev *dev); int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction); int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx); struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) { put_device(&dev->dev); } static inline void nfc_device_iter_init(struct class_dev_iter *iter) { class_dev_iter_init(iter, &nfc_class, NULL, NULL); } static inline struct nfc_dev *nfc_device_iter_next(struct class_dev_iter *iter) { struct device *d = class_dev_iter_next(iter); if (!d) return NULL; return to_nfc_dev(d); } static inline void nfc_device_iter_exit(struct class_dev_iter *iter) { class_dev_iter_exit(iter); } int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name); int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, u32 result); int nfc_dev_up(struct nfc_dev *dev); int nfc_dev_down(struct nfc_dev *dev); int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols); int nfc_stop_poll(struct nfc_dev *dev); int nfc_dep_link_up(struct nfc_dev *dev, int target_idx, u8 comm_mode); int nfc_dep_link_down(struct nfc_dev *dev); int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol); int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode); int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); int nfc_enable_se(struct nfc_dev *dev, u32 se_idx); int nfc_disable_se(struct nfc_dev *dev, u32 se_idx); #endif /* __LOCAL_NFC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:net,iface type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 nomatch flag support added */ /* 2 /0 support added */ /* 3 Counters support added */ /* 4 Comments support added */ /* 5 Forceadd support added */ /* 6 skbinfo support added */ /* 7 interface wildcard support added */ #define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Type specific function prefix */ #define HTYPE hash_netiface #define IP_SET_HASH_WITH_NETS #define IP_SET_HASH_WITH_MULTI #define IP_SET_HASH_WITH_NET0 #define STRSCPY(a, b) strscpy(a, b, IFNAMSIZ) /* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; }; /* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; u8 wildcard; char iface[IFNAMSIZ]; }; /* Common functions */ static bool hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, const struct hash_netiface4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && (ip1->wildcard ? strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : strcmp(ip1->iface, ip2->iface) == 0); } static int hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr) { elem->ip &= ip_set_netmask(cidr); elem->cidr = cidr; } static bool hash_netiface4_data_list(struct sk_buff *skb, const struct hash_netiface4_elem *data) { u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_netiface4_data_next(struct hash_netiface4_elem *next, const struct hash_netiface4_elem *d) { next->ip = d->ip; } #define MTYPE hash_netiface4 #define HOST_MASK 32 #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) #include "ip_set_hash_gen.h" #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static const char *get_physindev_name(const struct sk_buff *skb, struct net *net) { struct net_device *dev = nf_bridge_get_physindev(skb, net); return dev ? dev->name : NULL; } static const char *get_physoutdev_name(const struct sk_buff *skb) { struct net_device *dev = nf_bridge_get_physoutdev(skb); return dev ? dev->name : NULL; } #endif static int hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); #define IFACE(dir) (par->state->dir ? par->state->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) : get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) e.wildcard = 1; } if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip_to < ip) swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } if (retried) ip = ntohl(h->next.ip); do { i++; e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_netiface4_data_next(&h->next, &e); return -ERANGE; } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } while (ip++ < ip_to); return ret; } /* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; }; struct hash_netiface6_elem { union nf_inet_addr ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; u8 wildcard; char iface[IFNAMSIZ]; }; /* Common functions */ static bool hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, const struct hash_netiface6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && (ip1->wildcard ? strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : strcmp(ip1->iface, ip2->iface) == 0); } static int hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr) { ip6_netmask(&elem->ip, cidr); elem->cidr = cidr; } static bool hash_netiface6_data_list(struct sk_buff *skb, const struct hash_netiface6_elem *data) { u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_netiface6_data_next(struct hash_netiface6_elem *next, const struct hash_netiface6_elem *d) { } #undef MTYPE #undef HOST_MASK #define MTYPE hash_netiface6 #define HOST_MASK 128 #define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct hash_netiface6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6_netmask(&e.ip, e.cidr); if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) : get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ip6_netmask(&e.ip, e.cidr); nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) e.wildcard = 1; } ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } static struct ip_set_type hash_netiface_type __read_mostly = { .name = "hash:net,iface", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IFACE] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_netiface_init(void) { return ip_set_type_register(&hash_netiface_type); } static void __exit hash_netiface_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_netiface_type); } module_init(hash_netiface_init); module_exit(hash_netiface_fini);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2004, 2005 Oracle. All rights reserved. */ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/configfs.h> #include "tcp.h" #include "nodemanager.h" #include "heartbeat.h" #include "masklog.h" #include "sys.h" /* for now we operate under the assertion that there can be only one * cluster active at a time. Changing this will require trickling * cluster references throughout where nodes are looked up */ struct o2nm_cluster *o2nm_single_cluster = NULL; static const char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { "reset", /* O2NM_FENCE_RESET */ "panic", /* O2NM_FENCE_PANIC */ }; static inline void o2nm_lock_subsystem(void); static inline void o2nm_unlock_subsystem(void); struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { struct o2nm_node *node = NULL; if (node_num >= O2NM_MAX_NODES || o2nm_single_cluster == NULL) goto out; read_lock(&o2nm_single_cluster->cl_nodes_lock); node = o2nm_single_cluster->cl_nodes[node_num]; if (node) config_item_get(&node->nd_item); read_unlock(&o2nm_single_cluster->cl_nodes_lock); out: return node; } EXPORT_SYMBOL_GPL(o2nm_get_node_by_num); int o2nm_configured_node_map(unsigned long *map, unsigned bytes) { struct o2nm_cluster *cluster = o2nm_single_cluster; BUG_ON(bytes < (sizeof(cluster->cl_nodes_bitmap))); if (cluster == NULL) return -EINVAL; read_lock(&cluster->cl_nodes_lock); bitmap_copy(map, cluster->cl_nodes_bitmap, O2NM_MAX_NODES); read_unlock(&cluster->cl_nodes_lock); return 0; } EXPORT_SYMBOL_GPL(o2nm_configured_node_map); static struct o2nm_node *o2nm_node_ip_tree_lookup(struct o2nm_cluster *cluster, __be32 ip_needle, struct rb_node ***ret_p, struct rb_node **ret_parent) { struct rb_node **p = &cluster->cl_node_ip_tree.rb_node; struct rb_node *parent = NULL; struct o2nm_node *node, *ret = NULL; while (*p) { int cmp; parent = *p; node = rb_entry(parent, struct o2nm_node, nd_ip_node); cmp = memcmp(&ip_needle, &node->nd_ipv4_address, sizeof(ip_needle)); if (cmp < 0) p = &(*p)->rb_left; else if (cmp > 0) p = &(*p)->rb_right; else { ret = node; break; } } if (ret_p != NULL) *ret_p = p; if (ret_parent != NULL) *ret_parent = parent; return ret; } struct o2nm_node *o2nm_get_node_by_ip(__be32 addr) { struct o2nm_node *node = NULL; struct o2nm_cluster *cluster = o2nm_single_cluster; if (cluster == NULL) goto out; read_lock(&cluster->cl_nodes_lock); node = o2nm_node_ip_tree_lookup(cluster, addr, NULL, NULL); if (node) config_item_get(&node->nd_item); read_unlock(&cluster->cl_nodes_lock); out: return node; } EXPORT_SYMBOL_GPL(o2nm_get_node_by_ip); void o2nm_node_put(struct o2nm_node *node) { config_item_put(&node->nd_item); } EXPORT_SYMBOL_GPL(o2nm_node_put); void o2nm_node_get(struct o2nm_node *node) { config_item_get(&node->nd_item); } EXPORT_SYMBOL_GPL(o2nm_node_get); u8 o2nm_this_node(void) { u8 node_num = O2NM_MAX_NODES; if (o2nm_single_cluster && o2nm_single_cluster->cl_has_local) node_num = o2nm_single_cluster->cl_local_node; return node_num; } EXPORT_SYMBOL_GPL(o2nm_this_node); /* node configfs bits */ static struct o2nm_cluster *to_o2nm_cluster(struct config_item *item) { return item ? container_of(to_config_group(item), struct o2nm_cluster, cl_group) : NULL; } static struct o2nm_node *to_o2nm_node(struct config_item *item) { return item ? container_of(item, struct o2nm_node, nd_item) : NULL; } static void o2nm_node_release(struct config_item *item) { struct o2nm_node *node = to_o2nm_node(item); kfree(node); } static ssize_t o2nm_node_num_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", to_o2nm_node(item)->nd_num); } static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node) { /* through the first node_set .parent * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */ if (node->nd_item.ci_parent) return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent); else return NULL; } enum { O2NM_NODE_ATTR_NUM = 0, O2NM_NODE_ATTR_PORT, O2NM_NODE_ATTR_ADDRESS, }; static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster; unsigned long tmp; char *p = (char *)page; int ret = 0; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; if (tmp >= O2NM_MAX_NODES) return -ERANGE; /* once we're in the cl_nodes tree networking can look us up by * node number and try to use our address and port attributes * to connect to this node.. make sure that they've been set * before writing the node attribute? */ if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) || !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EINVAL; /* XXX */ o2nm_lock_subsystem(); cluster = to_o2nm_cluster_from_node(node); if (!cluster) { o2nm_unlock_subsystem(); return -EINVAL; } write_lock(&cluster->cl_nodes_lock); if (cluster->cl_nodes[tmp]) ret = -EEXIST; else if (test_and_set_bit(O2NM_NODE_ATTR_NUM, &node->nd_set_attributes)) ret = -EBUSY; else { cluster->cl_nodes[tmp] = node; node->nd_num = tmp; set_bit(tmp, cluster->cl_nodes_bitmap); } write_unlock(&cluster->cl_nodes_lock); o2nm_unlock_subsystem(); if (ret) return ret; return count; } static ssize_t o2nm_node_ipv4_port_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", ntohs(to_o2nm_node(item)->nd_ipv4_port)); } static ssize_t o2nm_node_ipv4_port_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); unsigned long tmp; char *p = (char *)page; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; if (tmp == 0) return -EINVAL; if (tmp >= (u16)-1) return -ERANGE; if (test_and_set_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EBUSY; node->nd_ipv4_port = htons(tmp); return count; } static ssize_t o2nm_node_ipv4_address_show(struct config_item *item, char *page) { return sprintf(page, "%pI4\n", &to_o2nm_node(item)->nd_ipv4_address); } static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster; int ret, i; struct rb_node **p, *parent; unsigned int octets[4]; __be32 ipv4_addr = 0; ret = sscanf(page, "%3u.%3u.%3u.%3u", &octets[3], &octets[2], &octets[1], &octets[0]); if (ret != 4) return -EINVAL; for (i = 0; i < ARRAY_SIZE(octets); i++) { if (octets[i] > 255) return -ERANGE; be32_add_cpu(&ipv4_addr, octets[i] << (i * 8)); } o2nm_lock_subsystem(); cluster = to_o2nm_cluster_from_node(node); if (!cluster) { o2nm_unlock_subsystem(); return -EINVAL; } ret = 0; write_lock(&cluster->cl_nodes_lock); if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent)) ret = -EEXIST; else if (test_and_set_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes)) ret = -EBUSY; else { rb_link_node(&node->nd_ip_node, parent, p); rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree); } write_unlock(&cluster->cl_nodes_lock); o2nm_unlock_subsystem(); if (ret) return ret; memcpy(&node->nd_ipv4_address, &ipv4_addr, sizeof(ipv4_addr)); return count; } static ssize_t o2nm_node_local_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", to_o2nm_node(item)->nd_local); } static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster; unsigned long tmp; char *p = (char *)page; ssize_t ret; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; tmp = !!tmp; /* boolean of whether this node wants to be local */ /* setting local turns on networking rx for now so we require having * set everything else first */ if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) || !test_bit(O2NM_NODE_ATTR_NUM, &node->nd_set_attributes) || !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EINVAL; /* XXX */ o2nm_lock_subsystem(); cluster = to_o2nm_cluster_from_node(node); if (!cluster) { ret = -EINVAL; goto out; } /* the only failure case is trying to set a new local node * when a different one is already set */ if (tmp && tmp == cluster->cl_has_local && cluster->cl_local_node != node->nd_num) { ret = -EBUSY; goto out; } /* bring up the rx thread if we're setting the new local node. */ if (tmp && !cluster->cl_has_local) { ret = o2net_start_listening(node); if (ret) goto out; } if (!tmp && cluster->cl_has_local && cluster->cl_local_node == node->nd_num) { o2net_stop_listening(node); cluster->cl_local_node = O2NM_INVALID_NODE_NUM; } node->nd_local = tmp; if (node->nd_local) { cluster->cl_has_local = tmp; cluster->cl_local_node = node->nd_num; } ret = count; out: o2nm_unlock_subsystem(); return ret; } CONFIGFS_ATTR(o2nm_node_, num); CONFIGFS_ATTR(o2nm_node_, ipv4_port); CONFIGFS_ATTR(o2nm_node_, ipv4_address); CONFIGFS_ATTR(o2nm_node_, local); static struct configfs_attribute *o2nm_node_attrs[] = { &o2nm_node_attr_num, &o2nm_node_attr_ipv4_port, &o2nm_node_attr_ipv4_address, &o2nm_node_attr_local, NULL, }; static struct configfs_item_operations o2nm_node_item_ops = { .release = o2nm_node_release, }; static const struct config_item_type o2nm_node_type = { .ct_item_ops = &o2nm_node_item_ops, .ct_attrs = o2nm_node_attrs, .ct_owner = THIS_MODULE, }; /* node set */ struct o2nm_node_group { struct config_group ns_group; /* some stuff? */ }; #if 0 static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group) { return group ? container_of(group, struct o2nm_node_group, ns_group) : NULL; } #endif static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, unsigned int *val) { unsigned long tmp; char *p = (char *)page; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; if (tmp == 0) return -EINVAL; if (tmp >= (u32)-1) return -ERANGE; *val = tmp; return count; } static ssize_t o2nm_cluster_idle_timeout_ms_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_idle_timeout_ms); } static ssize_t o2nm_cluster_idle_timeout_ms_store(struct config_item *item, const char *page, size_t count) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { if (cluster->cl_idle_timeout_ms != val && o2net_num_connected_peers()) { mlog(ML_NOTICE, "o2net: cannot change idle timeout after " "the first peer has agreed to it." " %d connected peers\n", o2net_num_connected_peers()); ret = -EINVAL; } else if (val <= cluster->cl_keepalive_delay_ms) { mlog(ML_NOTICE, "o2net: idle timeout must be larger " "than keepalive delay\n"); ret = -EINVAL; } else { cluster->cl_idle_timeout_ms = val; } } return ret; } static ssize_t o2nm_cluster_keepalive_delay_ms_show( struct config_item *item, char *page) { return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_keepalive_delay_ms); } static ssize_t o2nm_cluster_keepalive_delay_ms_store( struct config_item *item, const char *page, size_t count) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { if (cluster->cl_keepalive_delay_ms != val && o2net_num_connected_peers()) { mlog(ML_NOTICE, "o2net: cannot change keepalive delay after" " the first peer has agreed to it." " %d connected peers\n", o2net_num_connected_peers()); ret = -EINVAL; } else if (val >= cluster->cl_idle_timeout_ms) { mlog(ML_NOTICE, "o2net: keepalive delay must be " "smaller than idle timeout\n"); ret = -EINVAL; } else { cluster->cl_keepalive_delay_ms = val; } } return ret; } static ssize_t o2nm_cluster_reconnect_delay_ms_show( struct config_item *item, char *page) { return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_reconnect_delay_ms); } static ssize_t o2nm_cluster_reconnect_delay_ms_store( struct config_item *item, const char *page, size_t count) { return o2nm_cluster_attr_write(page, count, &to_o2nm_cluster(item)->cl_reconnect_delay_ms); } static ssize_t o2nm_cluster_fence_method_show( struct config_item *item, char *page) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret = 0; if (cluster) ret = sprintf(page, "%s\n", o2nm_fence_method_desc[cluster->cl_fence_method]); return ret; } static ssize_t o2nm_cluster_fence_method_store( struct config_item *item, const char *page, size_t count) { unsigned int i; if (page[count - 1] != '\n') goto bail; for (i = 0; i < O2NM_FENCE_METHODS; ++i) { if (count != strlen(o2nm_fence_method_desc[i]) + 1) continue; if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1)) continue; if (to_o2nm_cluster(item)->cl_fence_method != i) { printk(KERN_INFO "ocfs2: Changing fence method to %s\n", o2nm_fence_method_desc[i]); to_o2nm_cluster(item)->cl_fence_method = i; } return count; } bail: return -EINVAL; } CONFIGFS_ATTR(o2nm_cluster_, idle_timeout_ms); CONFIGFS_ATTR(o2nm_cluster_, keepalive_delay_ms); CONFIGFS_ATTR(o2nm_cluster_, reconnect_delay_ms); CONFIGFS_ATTR(o2nm_cluster_, fence_method); static struct configfs_attribute *o2nm_cluster_attrs[] = { &o2nm_cluster_attr_idle_timeout_ms, &o2nm_cluster_attr_keepalive_delay_ms, &o2nm_cluster_attr_reconnect_delay_ms, &o2nm_cluster_attr_fence_method, NULL, }; static struct config_item *o2nm_node_group_make_item(struct config_group *group, const char *name) { struct o2nm_node *node = NULL; if (strlen(name) > O2NM_MAX_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL); if (node == NULL) return ERR_PTR(-ENOMEM); strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */ config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); spin_lock_init(&node->nd_lock); mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name); return &node->nd_item; } static void o2nm_node_group_drop_item(struct config_group *group, struct config_item *item) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster(group->cg_item.ci_parent); if (cluster->cl_nodes[node->nd_num] == node) { o2net_disconnect_node(node); if (cluster->cl_has_local && (cluster->cl_local_node == node->nd_num)) { cluster->cl_has_local = 0; cluster->cl_local_node = O2NM_INVALID_NODE_NUM; o2net_stop_listening(node); } } /* XXX call into net to stop this node from trading messages */ write_lock(&cluster->cl_nodes_lock); /* XXX sloppy */ if (node->nd_ipv4_address) rb_erase(&node->nd_ip_node, &cluster->cl_node_ip_tree); /* nd_num might be 0 if the node number hasn't been set.. */ if (cluster->cl_nodes[node->nd_num] == node) { cluster->cl_nodes[node->nd_num] = NULL; clear_bit(node->nd_num, cluster->cl_nodes_bitmap); } write_unlock(&cluster->cl_nodes_lock); mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n", config_item_name(&node->nd_item)); config_item_put(item); } static struct configfs_group_operations o2nm_node_group_group_ops = { .make_item = o2nm_node_group_make_item, .drop_item = o2nm_node_group_drop_item, }; static const struct config_item_type o2nm_node_group_type = { .ct_group_ops = &o2nm_node_group_group_ops, .ct_owner = THIS_MODULE, }; /* cluster */ static void o2nm_cluster_release(struct config_item *item) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); kfree(cluster); } static struct configfs_item_operations o2nm_cluster_item_ops = { .release = o2nm_cluster_release, }; static const struct config_item_type o2nm_cluster_type = { .ct_item_ops = &o2nm_cluster_item_ops, .ct_attrs = o2nm_cluster_attrs, .ct_owner = THIS_MODULE, }; /* cluster set */ struct o2nm_cluster_group { struct configfs_subsystem cs_subsys; /* some stuff? */ }; #if 0 static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *group) { return group ? container_of(to_configfs_subsystem(group), struct o2nm_cluster_group, cs_subsys) : NULL; } #endif static struct config_group *o2nm_cluster_group_make_group(struct config_group *group, const char *name) { struct o2nm_cluster *cluster = NULL; struct o2nm_node_group *ns = NULL; struct config_group *o2hb_group = NULL, *ret = NULL; /* this runs under the parent dir's i_rwsem; there can be only * one caller in here at a time */ if (o2nm_single_cluster) return ERR_PTR(-ENOSPC); cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL); ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL); o2hb_group = o2hb_alloc_hb_set(); if (cluster == NULL || ns == NULL || o2hb_group == NULL) goto out; config_group_init_type_name(&cluster->cl_group, name, &o2nm_cluster_type); configfs_add_default_group(&ns->ns_group, &cluster->cl_group); config_group_init_type_name(&ns->ns_group, "node", &o2nm_node_group_type); configfs_add_default_group(o2hb_group, &cluster->cl_group); rwlock_init(&cluster->cl_nodes_lock); cluster->cl_node_ip_tree = RB_ROOT; cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; cluster->cl_fence_method = O2NM_FENCE_RESET; ret = &cluster->cl_group; o2nm_single_cluster = cluster; out: if (ret == NULL) { kfree(cluster); kfree(ns); o2hb_free_hb_set(o2hb_group); ret = ERR_PTR(-ENOMEM); } return ret; } static void o2nm_cluster_group_drop_item(struct config_group *group, struct config_item *item) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); BUG_ON(o2nm_single_cluster != cluster); o2nm_single_cluster = NULL; configfs_remove_default_groups(&cluster->cl_group); config_item_put(item); } static struct configfs_group_operations o2nm_cluster_group_group_ops = { .make_group = o2nm_cluster_group_make_group, .drop_item = o2nm_cluster_group_drop_item, }; static const struct config_item_type o2nm_cluster_group_type = { .ct_group_ops = &o2nm_cluster_group_group_ops, .ct_owner = THIS_MODULE, }; static struct o2nm_cluster_group o2nm_cluster_group = { .cs_subsys = { .su_group = { .cg_item = { .ci_namebuf = "cluster", .ci_type = &o2nm_cluster_group_type, }, }, }, }; static inline void o2nm_lock_subsystem(void) { mutex_lock(&o2nm_cluster_group.cs_subsys.su_mutex); } static inline void o2nm_unlock_subsystem(void) { mutex_unlock(&o2nm_cluster_group.cs_subsys.su_mutex); } int o2nm_depend_item(struct config_item *item) { return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); } void o2nm_undepend_item(struct config_item *item) { configfs_undepend_item(item); } int o2nm_depend_this_node(void) { int ret = 0; struct o2nm_node *local_node; local_node = o2nm_get_node_by_num(o2nm_this_node()); if (!local_node) { ret = -EINVAL; goto out; } ret = o2nm_depend_item(&local_node->nd_item); o2nm_node_put(local_node); out: return ret; } void o2nm_undepend_this_node(void) { struct o2nm_node *local_node; local_node = o2nm_get_node_by_num(o2nm_this_node()); BUG_ON(!local_node); o2nm_undepend_item(&local_node->nd_item); o2nm_node_put(local_node); } static void __exit exit_o2nm(void) { /* XXX sync with hb callbacks and shut down hb? */ o2net_unregister_hb_callbacks(); configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); o2cb_sys_shutdown(); o2net_exit(); o2hb_exit(); } static int __init init_o2nm(void) { int ret; o2hb_init(); ret = o2net_init(); if (ret) goto out_o2hb; ret = o2net_register_hb_callbacks(); if (ret) goto out_o2net; config_group_init(&o2nm_cluster_group.cs_subsys.su_group); mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys); if (ret) { printk(KERN_ERR "nodemanager: Registration returned %d\n", ret); goto out_callbacks; } ret = o2cb_sys_init(); if (!ret) goto out; configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); out_callbacks: o2net_unregister_hb_callbacks(); out_o2net: o2net_exit(); out_o2hb: o2hb_exit(); out: return ret; } MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("OCFS2 cluster management"); module_init(init_o2nm) module_exit(exit_o2nm)
2 2 1 1 9 9 9 9 9 4 3 3 1 1 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2003-2008 Takahiro Hirofuchi * Copyright (C) 2015-2016 Samsung Electronics * Krzysztof Opasiak <k.opasiak@samsung.com> */ #include <asm/byteorder.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <net/sock.h> #include "usbip_common.h" #define DRIVER_AUTHOR "Takahiro Hirofuchi <hirofuchi@users.sourceforge.net>" #define DRIVER_DESC "USB/IP Core" #ifdef CONFIG_USBIP_DEBUG unsigned long usbip_debug_flag = 0xffffffff; #else unsigned long usbip_debug_flag; #endif EXPORT_SYMBOL_GPL(usbip_debug_flag); module_param(usbip_debug_flag, ulong, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(usbip_debug_flag, "debug flags (defined in usbip_common.h)"); /* FIXME */ struct device_attribute dev_attr_usbip_debug; EXPORT_SYMBOL_GPL(dev_attr_usbip_debug); static ssize_t usbip_debug_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lx\n", usbip_debug_flag); } static ssize_t usbip_debug_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { if (sscanf(buf, "%lx", &usbip_debug_flag) != 1) return -EINVAL; return count; } DEVICE_ATTR_RW(usbip_debug); static void usbip_dump_buffer(char *buff, int bufflen) { print_hex_dump(KERN_DEBUG, "usbip-core", DUMP_PREFIX_OFFSET, 16, 4, buff, bufflen, false); } static void usbip_dump_pipe(unsigned int p) { unsigned char type = usb_pipetype(p); unsigned char ep = usb_pipeendpoint(p); unsigned char dev = usb_pipedevice(p); unsigned char dir = usb_pipein(p); pr_debug("dev(%d) ep(%d) [%s] ", dev, ep, dir ? "IN" : "OUT"); switch (type) { case PIPE_ISOCHRONOUS: pr_debug("ISO\n"); break; case PIPE_INTERRUPT: pr_debug("INT\n"); break; case PIPE_CONTROL: pr_debug("CTRL\n"); break; case PIPE_BULK: pr_debug("BULK\n"); break; default: pr_debug("ERR\n"); break; } } static void usbip_dump_usb_device(struct usb_device *udev) { struct device *dev = &udev->dev; int i; dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", udev->devnum, udev->devpath, usb_speed_string(udev->speed)); pr_debug("tt hub ttport %d\n", udev->ttport); dev_dbg(dev, " "); for (i = 0; i < 16; i++) pr_debug(" %2u", i); pr_debug("\n"); dev_dbg(dev, " toggle0(IN) :"); for (i = 0; i < 16; i++) pr_debug(" %2u", (udev->toggle[0] & (1 << i)) ? 1 : 0); pr_debug("\n"); dev_dbg(dev, " toggle1(OUT):"); for (i = 0; i < 16; i++) pr_debug(" %2u", (udev->toggle[1] & (1 << i)) ? 1 : 0); pr_debug("\n"); dev_dbg(dev, " epmaxp_in :"); for (i = 0; i < 16; i++) { if (udev->ep_in[i]) pr_debug(" %2u", le16_to_cpu(udev->ep_in[i]->desc.wMaxPacketSize)); } pr_debug("\n"); dev_dbg(dev, " epmaxp_out :"); for (i = 0; i < 16; i++) { if (udev->ep_out[i]) pr_debug(" %2u", le16_to_cpu(udev->ep_out[i]->desc.wMaxPacketSize)); } pr_debug("\n"); dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev), udev->bus->bus_name); dev_dbg(dev, "have_langid %d, string_langid %d\n", udev->have_langid, udev->string_langid); dev_dbg(dev, "maxchild %d\n", udev->maxchild); } static void usbip_dump_request_type(__u8 rt) { switch (rt & USB_RECIP_MASK) { case USB_RECIP_DEVICE: pr_debug("DEVICE"); break; case USB_RECIP_INTERFACE: pr_debug("INTERF"); break; case USB_RECIP_ENDPOINT: pr_debug("ENDPOI"); break; case USB_RECIP_OTHER: pr_debug("OTHER "); break; default: pr_debug("------"); break; } } static void usbip_dump_usb_ctrlrequest(struct usb_ctrlrequest *cmd) { if (!cmd) { pr_debug(" : null pointer\n"); return; } pr_debug(" "); pr_debug("bRequestType(%02X) bRequest(%02X) wValue(%04X) wIndex(%04X) wLength(%04X) ", cmd->bRequestType, cmd->bRequest, cmd->wValue, cmd->wIndex, cmd->wLength); pr_debug("\n "); if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD) { pr_debug("STANDARD "); switch (cmd->bRequest) { case USB_REQ_GET_STATUS: pr_debug("GET_STATUS\n"); break; case USB_REQ_CLEAR_FEATURE: pr_debug("CLEAR_FEAT\n"); break; case USB_REQ_SET_FEATURE: pr_debug("SET_FEAT\n"); break; case USB_REQ_SET_ADDRESS: pr_debug("SET_ADDRRS\n"); break; case USB_REQ_GET_DESCRIPTOR: pr_debug("GET_DESCRI\n"); break; case USB_REQ_SET_DESCRIPTOR: pr_debug("SET_DESCRI\n"); break; case USB_REQ_GET_CONFIGURATION: pr_debug("GET_CONFIG\n"); break; case USB_REQ_SET_CONFIGURATION: pr_debug("SET_CONFIG\n"); break; case USB_REQ_GET_INTERFACE: pr_debug("GET_INTERF\n"); break; case USB_REQ_SET_INTERFACE: pr_debug("SET_INTERF\n"); break; case USB_REQ_SYNCH_FRAME: pr_debug("SYNC_FRAME\n"); break; default: pr_debug("REQ(%02X)\n", cmd->bRequest); break; } usbip_dump_request_type(cmd->bRequestType); } else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS) { pr_debug("CLASS\n"); } else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_VENDOR) { pr_debug("VENDOR\n"); } else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_RESERVED) { pr_debug("RESERVED\n"); } } void usbip_dump_urb(struct urb *urb) { struct device *dev; if (!urb) { pr_debug("urb: null pointer!!\n"); return; } if (!urb->dev) { pr_debug("urb->dev: null pointer!!\n"); return; } dev = &urb->dev->dev; usbip_dump_usb_device(urb->dev); dev_dbg(dev, " pipe :%08x ", urb->pipe); usbip_dump_pipe(urb->pipe); dev_dbg(dev, " status :%d\n", urb->status); dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); dev_dbg(dev, " transfer_buffer_length:%d\n", urb->transfer_buffer_length); dev_dbg(dev, " actual_length :%d\n", urb->actual_length); if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) usbip_dump_usb_ctrlrequest( (struct usb_ctrlrequest *)urb->setup_packet); dev_dbg(dev, " start_frame :%d\n", urb->start_frame); dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); dev_dbg(dev, " interval :%d\n", urb->interval); dev_dbg(dev, " error_count :%d\n", urb->error_count); } EXPORT_SYMBOL_GPL(usbip_dump_urb); void usbip_dump_header(struct usbip_header *pdu) { pr_debug("BASE: cmd %u seq %u devid %u dir %u ep %u\n", pdu->base.command, pdu->base.seqnum, pdu->base.devid, pdu->base.direction, pdu->base.ep); switch (pdu->base.command) { case USBIP_CMD_SUBMIT: pr_debug("USBIP_CMD_SUBMIT: x_flags %u x_len %u sf %u #p %d iv %d\n", pdu->u.cmd_submit.transfer_flags, pdu->u.cmd_submit.transfer_buffer_length, pdu->u.cmd_submit.start_frame, pdu->u.cmd_submit.number_of_packets, pdu->u.cmd_submit.interval); break; case USBIP_CMD_UNLINK: pr_debug("USBIP_CMD_UNLINK: seq %u\n", pdu->u.cmd_unlink.seqnum); break; case USBIP_RET_SUBMIT: pr_debug("USBIP_RET_SUBMIT: st %d al %u sf %d #p %d ec %d\n", pdu->u.ret_submit.status, pdu->u.ret_submit.actual_length, pdu->u.ret_submit.start_frame, pdu->u.ret_submit.number_of_packets, pdu->u.ret_submit.error_count); break; case USBIP_RET_UNLINK: pr_debug("USBIP_RET_UNLINK: status %d\n", pdu->u.ret_unlink.status); break; default: /* NOT REACHED */ pr_err("unknown command\n"); break; } } EXPORT_SYMBOL_GPL(usbip_dump_header); /* Receive data over TCP/IP. */ int usbip_recv(struct socket *sock, void *buf, int size) { int result; struct kvec iov = {.iov_base = buf, .iov_len = size}; struct msghdr msg = {.msg_flags = MSG_NOSIGNAL}; int total = 0; if (!sock || !buf || !size) return -EINVAL; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size); usbip_dbg_xmit("enter\n"); do { sock->sk->sk_allocation = GFP_NOIO; sock->sk->sk_use_task_frag = false; result = sock_recvmsg(sock, &msg, MSG_WAITALL); if (result <= 0) goto err; total += result; } while (msg_data_left(&msg)); if (usbip_dbg_flag_xmit) { pr_debug("receiving....\n"); usbip_dump_buffer(buf, size); pr_debug("received, osize %d ret %d size %zd total %d\n", size, result, msg_data_left(&msg), total); } return total; err: return result; } EXPORT_SYMBOL_GPL(usbip_recv); /* there may be more cases to tweak the flags. */ static unsigned int tweak_transfer_flags(unsigned int flags) { flags &= ~URB_NO_TRANSFER_DMA_MAP; return flags; } /* * USBIP driver packs URB transfer flags in PDUs that are exchanged * between Server (usbip_host) and Client (vhci_hcd). URB_* flags * are internal to kernel and could change. Where as USBIP URB flags * exchanged in PDUs are USBIP user API must not change. * * USBIP_URB* flags are exported as explicit API and client and server * do mapping from kernel flags to USBIP_URB*. Details as follows: * * Client tx path (USBIP_CMD_SUBMIT): * - Maps URB_* to USBIP_URB_* when it sends USBIP_CMD_SUBMIT packet. * * Server rx path (USBIP_CMD_SUBMIT): * - Maps USBIP_URB_* to URB_* when it receives USBIP_CMD_SUBMIT packet. * * Flags aren't included in USBIP_CMD_UNLINK and USBIP_RET_SUBMIT packets * and no special handling is needed for them in the following cases: * - Server rx path (USBIP_CMD_UNLINK) * - Client rx path & Server tx path (USBIP_RET_SUBMIT) * * Code paths: * usbip_pack_pdu() is the common routine that handles packing pdu from * urb and unpack pdu to an urb. * * usbip_pack_cmd_submit() and usbip_pack_ret_submit() handle * USBIP_CMD_SUBMIT and USBIP_RET_SUBMIT respectively. * * usbip_map_urb_to_usbip() and usbip_map_usbip_to_urb() are used * by usbip_pack_cmd_submit() and usbip_pack_ret_submit() to map * flags. */ struct urb_to_usbip_flags { u32 urb_flag; u32 usbip_flag; }; #define NUM_USBIP_FLAGS 17 static const struct urb_to_usbip_flags flag_map[NUM_USBIP_FLAGS] = { {URB_SHORT_NOT_OK, USBIP_URB_SHORT_NOT_OK}, {URB_ISO_ASAP, USBIP_URB_ISO_ASAP}, {URB_NO_TRANSFER_DMA_MAP, USBIP_URB_NO_TRANSFER_DMA_MAP}, {URB_ZERO_PACKET, USBIP_URB_ZERO_PACKET}, {URB_NO_INTERRUPT, USBIP_URB_NO_INTERRUPT}, {URB_FREE_BUFFER, USBIP_URB_FREE_BUFFER}, {URB_DIR_IN, USBIP_URB_DIR_IN}, {URB_DIR_OUT, USBIP_URB_DIR_OUT}, {URB_DIR_MASK, USBIP_URB_DIR_MASK}, {URB_DMA_MAP_SINGLE, USBIP_URB_DMA_MAP_SINGLE}, {URB_DMA_MAP_PAGE, USBIP_URB_DMA_MAP_PAGE}, {URB_DMA_MAP_SG, USBIP_URB_DMA_MAP_SG}, {URB_MAP_LOCAL, USBIP_URB_MAP_LOCAL}, {URB_SETUP_MAP_SINGLE, USBIP_URB_SETUP_MAP_SINGLE}, {URB_SETUP_MAP_LOCAL, USBIP_URB_SETUP_MAP_LOCAL}, {URB_DMA_SG_COMBINED, USBIP_URB_DMA_SG_COMBINED}, {URB_ALIGNED_TEMP_BUFFER, USBIP_URB_ALIGNED_TEMP_BUFFER}, }; static unsigned int urb_to_usbip(unsigned int flags) { unsigned int map_flags = 0; int loop; for (loop = 0; loop < NUM_USBIP_FLAGS; loop++) { if (flags & flag_map[loop].urb_flag) map_flags |= flag_map[loop].usbip_flag; } return map_flags; } static unsigned int usbip_to_urb(unsigned int flags) { unsigned int map_flags = 0; int loop; for (loop = 0; loop < NUM_USBIP_FLAGS; loop++) { if (flags & flag_map[loop].usbip_flag) map_flags |= flag_map[loop].urb_flag; } return map_flags; } static void usbip_pack_cmd_submit(struct usbip_header *pdu, struct urb *urb, int pack) { struct usbip_header_cmd_submit *spdu = &pdu->u.cmd_submit; /* * Some members are not still implemented in usbip. I hope this issue * will be discussed when usbip is ported to other operating systems. */ if (pack) { /* map after tweaking the urb flags */ spdu->transfer_flags = urb_to_usbip(tweak_transfer_flags(urb->transfer_flags)); spdu->transfer_buffer_length = urb->transfer_buffer_length; spdu->start_frame = urb->start_frame; spdu->number_of_packets = urb->number_of_packets; spdu->interval = urb->interval; } else { urb->transfer_flags = usbip_to_urb(spdu->transfer_flags); urb->transfer_buffer_length = spdu->transfer_buffer_length; urb->start_frame = spdu->start_frame; urb->number_of_packets = spdu->number_of_packets; urb->interval = spdu->interval; } } static void usbip_pack_ret_submit(struct usbip_header *pdu, struct urb *urb, int pack) { struct usbip_header_ret_submit *rpdu = &pdu->u.ret_submit; if (pack) { rpdu->status = urb->status; rpdu->actual_length = urb->actual_length; rpdu->start_frame = urb->start_frame; rpdu->number_of_packets = urb->number_of_packets; rpdu->error_count = urb->error_count; } else { urb->status = rpdu->status; urb->actual_length = rpdu->actual_length; urb->start_frame = rpdu->start_frame; urb->number_of_packets = rpdu->number_of_packets; urb->error_count = rpdu->error_count; } } void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd, int pack) { switch (cmd) { case USBIP_CMD_SUBMIT: usbip_pack_cmd_submit(pdu, urb, pack); break; case USBIP_RET_SUBMIT: usbip_pack_ret_submit(pdu, urb, pack); break; default: /* NOT REACHED */ pr_err("unknown command\n"); break; } } EXPORT_SYMBOL_GPL(usbip_pack_pdu); static void correct_endian_basic(struct usbip_header_basic *base, int send) { if (send) { base->command = cpu_to_be32(base->command); base->seqnum = cpu_to_be32(base->seqnum); base->devid = cpu_to_be32(base->devid); base->direction = cpu_to_be32(base->direction); base->ep = cpu_to_be32(base->ep); } else { base->command = be32_to_cpu(base->command); base->seqnum = be32_to_cpu(base->seqnum); base->devid = be32_to_cpu(base->devid); base->direction = be32_to_cpu(base->direction); base->ep = be32_to_cpu(base->ep); } } static void correct_endian_cmd_submit(struct usbip_header_cmd_submit *pdu, int send) { if (send) { pdu->transfer_flags = cpu_to_be32(pdu->transfer_flags); cpu_to_be32s(&pdu->transfer_buffer_length); cpu_to_be32s(&pdu->start_frame); cpu_to_be32s(&pdu->number_of_packets); cpu_to_be32s(&pdu->interval); } else { pdu->transfer_flags = be32_to_cpu(pdu->transfer_flags); be32_to_cpus(&pdu->transfer_buffer_length); be32_to_cpus(&pdu->start_frame); be32_to_cpus(&pdu->number_of_packets); be32_to_cpus(&pdu->interval); } } static void correct_endian_ret_submit(struct usbip_header_ret_submit *pdu, int send) { if (send) { cpu_to_be32s(&pdu->status); cpu_to_be32s(&pdu->actual_length); cpu_to_be32s(&pdu->start_frame); cpu_to_be32s(&pdu->number_of_packets); cpu_to_be32s(&pdu->error_count); } else { be32_to_cpus(&pdu->status); be32_to_cpus(&pdu->actual_length); be32_to_cpus(&pdu->start_frame); be32_to_cpus(&pdu->number_of_packets); be32_to_cpus(&pdu->error_count); } } static void correct_endian_cmd_unlink(struct usbip_header_cmd_unlink *pdu, int send) { if (send) pdu->seqnum = cpu_to_be32(pdu->seqnum); else pdu->seqnum = be32_to_cpu(pdu->seqnum); } static void correct_endian_ret_unlink(struct usbip_header_ret_unlink *pdu, int send) { if (send) cpu_to_be32s(&pdu->status); else be32_to_cpus(&pdu->status); } void usbip_header_correct_endian(struct usbip_header *pdu, int send) { __u32 cmd = 0; if (send) cmd = pdu->base.command; correct_endian_basic(&pdu->base, send); if (!send) cmd = pdu->base.command; switch (cmd) { case USBIP_CMD_SUBMIT: correct_endian_cmd_submit(&pdu->u.cmd_submit, send); break; case USBIP_RET_SUBMIT: correct_endian_ret_submit(&pdu->u.ret_submit, send); break; case USBIP_CMD_UNLINK: correct_endian_cmd_unlink(&pdu->u.cmd_unlink, send); break; case USBIP_RET_UNLINK: correct_endian_ret_unlink(&pdu->u.ret_unlink, send); break; default: /* NOT REACHED */ pr_err("unknown command\n"); break; } } EXPORT_SYMBOL_GPL(usbip_header_correct_endian); static void usbip_iso_packet_correct_endian( struct usbip_iso_packet_descriptor *iso, int send) { /* does not need all members. but copy all simply. */ if (send) { iso->offset = cpu_to_be32(iso->offset); iso->length = cpu_to_be32(iso->length); iso->status = cpu_to_be32(iso->status); iso->actual_length = cpu_to_be32(iso->actual_length); } else { iso->offset = be32_to_cpu(iso->offset); iso->length = be32_to_cpu(iso->length); iso->status = be32_to_cpu(iso->status); iso->actual_length = be32_to_cpu(iso->actual_length); } } static void usbip_pack_iso(struct usbip_iso_packet_descriptor *iso, struct usb_iso_packet_descriptor *uiso, int pack) { if (pack) { iso->offset = uiso->offset; iso->length = uiso->length; iso->status = uiso->status; iso->actual_length = uiso->actual_length; } else { uiso->offset = iso->offset; uiso->length = iso->length; uiso->status = iso->status; uiso->actual_length = iso->actual_length; } } /* must free buffer */ struct usbip_iso_packet_descriptor* usbip_alloc_iso_desc_pdu(struct urb *urb, ssize_t *bufflen) { struct usbip_iso_packet_descriptor *iso; int np = urb->number_of_packets; ssize_t size = np * sizeof(*iso); int i; iso = kzalloc(size, GFP_KERNEL); if (!iso) return NULL; for (i = 0; i < np; i++) { usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 1); usbip_iso_packet_correct_endian(&iso[i], 1); } *bufflen = size; return iso; } EXPORT_SYMBOL_GPL(usbip_alloc_iso_desc_pdu); /* some members of urb must be substituted before. */ int usbip_recv_iso(struct usbip_device *ud, struct urb *urb) { void *buff; struct usbip_iso_packet_descriptor *iso; int np = urb->number_of_packets; int size = np * sizeof(*iso); int i; int ret; int total_length = 0; if (!usb_pipeisoc(urb->pipe)) return 0; /* my Bluetooth dongle gets ISO URBs which are np = 0 */ if (np == 0) return 0; buff = kzalloc(size, GFP_KERNEL); if (!buff) return -ENOMEM; ret = usbip_recv(ud->tcp_socket, buff, size); if (ret != size) { dev_err(&urb->dev->dev, "recv iso_frame_descriptor, %d\n", ret); kfree(buff); if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); else usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return -EPIPE; } iso = (struct usbip_iso_packet_descriptor *) buff; for (i = 0; i < np; i++) { usbip_iso_packet_correct_endian(&iso[i], 0); usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 0); total_length += urb->iso_frame_desc[i].actual_length; } kfree(buff); if (total_length != urb->actual_length) { dev_err(&urb->dev->dev, "total length of iso packets %d not equal to actual length of buffer %d\n", total_length, urb->actual_length); if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); else usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return -EPIPE; } return ret; } EXPORT_SYMBOL_GPL(usbip_recv_iso); /* * This functions restores the padding which was removed for optimizing * the bandwidth during transfer over tcp/ip * * buffer and iso packets need to be stored and be in propeper endian in urb * before calling this function */ void usbip_pad_iso(struct usbip_device *ud, struct urb *urb) { int np = urb->number_of_packets; int i; int actualoffset = urb->actual_length; if (!usb_pipeisoc(urb->pipe)) return; /* if no packets or length of data is 0, then nothing to unpack */ if (np == 0 || urb->actual_length == 0) return; /* * if actual_length is transfer_buffer_length then no padding is * present. */ if (urb->actual_length == urb->transfer_buffer_length) return; /* * loop over all packets from last to first (to prevent overwriting * memory when padding) and move them into the proper place */ for (i = np-1; i > 0; i--) { actualoffset -= urb->iso_frame_desc[i].actual_length; memmove(urb->transfer_buffer + urb->iso_frame_desc[i].offset, urb->transfer_buffer + actualoffset, urb->iso_frame_desc[i].actual_length); } } EXPORT_SYMBOL_GPL(usbip_pad_iso); /* some members of urb must be substituted before. */ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) { struct scatterlist *sg; int ret = 0; int recv; int size; int copy; int i; if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) { /* the direction of urb must be OUT. */ if (usb_pipein(urb->pipe)) return 0; size = urb->transfer_buffer_length; } else { /* the direction of urb must be IN. */ if (usb_pipeout(urb->pipe)) return 0; size = urb->actual_length; } /* no need to recv xbuff */ if (!(size > 0)) return 0; if (size > urb->transfer_buffer_length) /* should not happen, probably malicious packet */ goto error; if (urb->num_sgs) { copy = size; for_each_sg(urb->sg, sg, urb->num_sgs, i) { int recv_size; if (copy < sg->length) recv_size = copy; else recv_size = sg->length; recv = usbip_recv(ud->tcp_socket, sg_virt(sg), recv_size); if (recv != recv_size) goto error; copy -= recv; ret += recv; if (!copy) break; } if (ret != size) goto error; } else { ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); if (ret != size) goto error; } return ret; error: dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); else usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return -EPIPE; } EXPORT_SYMBOL_GPL(usbip_recv_xbuff); static int __init usbip_core_init(void) { return usbip_init_eh(); } static void __exit usbip_core_exit(void) { usbip_finish_eh(); return; } module_init(usbip_core_init); module_exit(usbip_core_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
10 10 10 10 8 10 9 8 9 9 9 9 9 9 9 9 3 9 9 9 9 4 5 1 1 1 1 7 7 7 8 7 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 /* * llc_sap.c - driver routines for SAP component. * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <net/llc.h> #include <net/llc_if.h> #include <net/llc_conn.h> #include <net/llc_pdu.h> #include <net/llc_sap.h> #include <net/llc_s_ac.h> #include <net/llc_s_ev.h> #include <net/llc_s_st.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/llc.h> #include <linux/slab.h> static int llc_mac_header_len(unsigned short devtype) { switch (devtype) { case ARPHRD_ETHER: case ARPHRD_LOOPBACK: return sizeof(struct ethhdr); } return 0; } /** * llc_alloc_frame - allocates sk_buff for frame * @sk: socket to allocate frame to * @dev: network device this skb will be sent over * @type: pdu type to allocate * @data_size: data size to allocate * * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, u8 type, u32 data_size) { int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; struct sk_buff *skb; hlen += llc_mac_header_len(dev->type); skb = alloc_skb(hlen + data_size, GFP_ATOMIC); if (skb) { skb_reset_mac_header(skb); skb_reserve(skb, hlen); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->protocol = htons(ETH_P_802_2); skb->dev = dev; if (sk != NULL) skb_set_owner_w(skb, sk); } return skb; } void llc_save_primitive(struct sock *sk, struct sk_buff *skb, u8 prim) { struct sockaddr_llc *addr; /* save primitive for use by the user. */ addr = llc_ui_skb_cb(skb); memset(addr, 0, sizeof(*addr)); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; addr->sllc_xid = prim == LLC_XID_PRIM; addr->sllc_ua = prim == LLC_DATAUNIT_PRIM; llc_pdu_decode_sa(skb, addr->sllc_mac); llc_pdu_decode_ssap(skb, &addr->sllc_sap); } /** * llc_sap_rtn_pdu - Informs upper layer on rx of an UI, XID or TEST pdu. * @sap: pointer to SAP * @skb: received pdu */ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); switch (LLC_U_PDU_RSP(pdu)) { case LLC_1_PDU_CMD_TEST: ev->prim = LLC_TEST_PRIM; break; case LLC_1_PDU_CMD_XID: ev->prim = LLC_XID_PRIM; break; case LLC_1_PDU_CMD_UI: ev->prim = LLC_DATAUNIT_PRIM; break; } ev->ind_cfm_flag = LLC_IND; } /** * llc_find_sap_trans - finds transition for event * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event. * Returns the pointer to found transition on success or %NULL for * failure. */ static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, struct sk_buff *skb) { int i = 0; struct llc_sap_state_trans *rc = NULL; struct llc_sap_state_trans **next_trans; struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1]; /* * Search thru events for this state until list exhausted or until * its obvious the event is not valid for the current state */ for (next_trans = curr_state->transitions; next_trans[i]->ev; i++) if (!next_trans[i]->ev(sap, skb)) { rc = next_trans[i]; /* got event match; return it */ break; } return rc; } /** * llc_exec_sap_trans_actions - execute actions related to event * @sap: pointer to SAP * @trans: pointer to transition that it's actions must be performed * @skb: happened event. * * This function executes actions that is related to happened event. * Returns 0 for success and 1 for failure of at least one action. */ static int llc_exec_sap_trans_actions(struct llc_sap *sap, struct llc_sap_state_trans *trans, struct sk_buff *skb) { int rc = 0; const llc_sap_action_t *next_action = trans->ev_actions; for (; next_action && *next_action; next_action++) if ((*next_action)(sap, skb)) rc = 1; return rc; } /** * llc_sap_next_state - finds transition, execs actions & change SAP state * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event, then * executes related actions and finally changes state of SAP. It returns * 0 on success and 1 for failure. */ static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb) { int rc = 1; struct llc_sap_state_trans *trans; if (sap->state > LLC_NR_SAP_STATES) goto out; trans = llc_find_sap_trans(sap, skb); if (!trans) goto out; /* * Got the state to which we next transition; perform the actions * associated with this transition before actually transitioning to the * next state */ rc = llc_exec_sap_trans_actions(sap, trans, skb); if (rc) goto out; /* * Transition SAP to next state if all actions execute successfully */ sap->state = trans->next_state; out: return rc; } /** * llc_sap_state_process - sends event to SAP state machine * @sap: sap to use * @skb: pointer to occurred event * * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. * * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { llc_save_primitive(skb->sk, skb, ev->prim); /* queue skb to the user. */ if (sock_queue_rcv_skb(skb->sk, skb) == 0) return; } kfree_skb(skb); } /** * llc_build_and_send_test_pkt - TEST interface for upper layers. * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a TEST pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_test_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_TEST_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_build_and_send_xid_pkt - XID interface for upper layers * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a XID pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_XID_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_sap_rcv - sends received pdus to the sap state machine * @sap: current sap component structure. * @skb: received frame. * @sk: socket to associate to frame * * Sends received pdus to the sap state machine. */ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, struct sock *sk) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; skb_orphan(skb); sock_hold(sk); skb->sk = sk; skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } static inline bool llc_dgram_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sock *sk, const struct net *net) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && net_eq(sock_net(sk), net) && llc->laddr.lsap == laddr->lsap && ether_addr_equal(llc->laddr.mac, laddr->mac); } /** * llc_lookup_dgram - Finds dgram socket for the local sap/mac * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @net: netns to look up a socket in * * Search socket list of the SAP and finds connection using the local * mac, and local sap. Returns pointer for socket found, %NULL otherwise. */ static struct sock *llc_lookup_dgram(struct llc_sap *sap, const struct llc_addr *laddr, const struct net *net) { struct sock *rc; struct hlist_nulls_node *node; int slot = llc_sk_laddr_hashfn(sap, laddr); struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot]; rcu_read_lock_bh(); again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc, net)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_dgram_match(sap, laddr, rc, net))) { sock_put(rc); continue; } goto found; } } rc = NULL; /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (unlikely(get_nulls_value(node) != slot)) goto again; found: rcu_read_unlock_bh(); return rc; } static inline bool llc_mcast_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sk_buff *skb, const struct sock *sk) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && llc->laddr.lsap == laddr->lsap && llc->dev == skb->dev; } static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb, struct sock **stack, int count) { struct sk_buff *skb1; int i; for (i = 0; i < count; i++) { skb1 = skb_clone(skb, GFP_ATOMIC); if (!skb1) { sock_put(stack[i]); continue; } llc_sap_rcv(sap, skb1, stack[i]); sock_put(stack[i]); } } /** * llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets. * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @skb: PDU to deliver * * Search socket list of the SAP and finds connections with same sap. * Deliver clone to each. */ static void llc_sap_mcast(struct llc_sap *sap, const struct llc_addr *laddr, struct sk_buff *skb) { int i = 0; struct sock *sk; struct sock *stack[256 / sizeof(struct sock *)]; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); hlist_for_each_entry(llc, dev_hb, dev_hash_node) { sk = &llc->sk; if (!llc_mcast_match(sap, laddr, skb, sk)) continue; sock_hold(sk); if (i < ARRAY_SIZE(stack)) stack[i++] = sk; else { llc_do_mcast(sap, skb, stack, i); i = 0; } } spin_unlock_bh(&sap->sk_lock); llc_do_mcast(sap, skb, stack, i); } void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) { struct llc_addr laddr; llc_pdu_decode_da(skb, laddr.mac); llc_pdu_decode_dsap(skb, &laddr.lsap); if (is_multicast_ether_addr(laddr.mac)) { llc_sap_mcast(sap, &laddr, skb); kfree_skb(skb); } else { struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev)); if (sk) { llc_sap_rcv(sap, skb, sk); sock_put(sk); } else kfree_skb(skb); } }
2 2 1 2 5 5 1 1 1 1 5 1 5 1 1 5 2 2 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 /* BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #include <linux/compat.h> #include <linux/export.h> #include <linux/file.h> #include "bnep.h" static struct bt_sock_list bnep_sk_list = { .lock = __RW_LOCK_UNLOCKED(bnep_sk_list.lock) }; static int bnep_sock_release(struct socket *sock) { struct sock *sk = sock->sk; BT_DBG("sock %p sk %p", sock, sk); if (!sk) return 0; bt_sock_unlink(&bnep_sk_list, sk); sock_orphan(sk); sock_put(sk); return 0; } static int do_bnep_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp) { struct bnep_connlist_req cl; struct bnep_connadd_req ca; struct bnep_conndel_req cd; struct bnep_conninfo ci; struct socket *nsock; __u32 supp_feat = BIT(BNEP_SETUP_RESPONSE); int err; BT_DBG("cmd %x arg %p", cmd, argp); switch (cmd) { case BNEPCONNADD: if (!capable(CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; nsock = sockfd_lookup(ca.sock, &err); if (!nsock) return err; if (nsock->sk->sk_state != BT_CONNECTED) { sockfd_put(nsock); return -EBADFD; } ca.device[sizeof(ca.device)-1] = 0; err = bnep_add_connection(&ca, nsock); if (!err) { if (copy_to_user(argp, &ca, sizeof(ca))) err = -EFAULT; } else sockfd_put(nsock); return err; case BNEPCONNDEL: if (!capable(CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; return bnep_del_connection(&cd); case BNEPGETCONNLIST: if (copy_from_user(&cl, argp, sizeof(cl))) return -EFAULT; if (cl.cnum <= 0) return -EINVAL; err = bnep_get_connlist(&cl); if (!err && copy_to_user(argp, &cl, sizeof(cl))) return -EFAULT; return err; case BNEPGETCONNINFO: if (copy_from_user(&ci, argp, sizeof(ci))) return -EFAULT; err = bnep_get_conninfo(&ci); if (!err && copy_to_user(argp, &ci, sizeof(ci))) return -EFAULT; return err; case BNEPGETSUPPFEAT: if (copy_to_user(argp, &supp_feat, sizeof(supp_feat))) return -EFAULT; return 0; default: return -EINVAL; } return 0; } static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return do_bnep_sock_ioctl(sock, cmd, (void __user *)arg); } #ifdef CONFIG_COMPAT static int bnep_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); if (cmd == BNEPGETCONNLIST) { struct bnep_connlist_req cl; unsigned __user *p = argp; u32 uci; int err; if (get_user(cl.cnum, p) || get_user(uci, p + 1)) return -EFAULT; cl.ci = compat_ptr(uci); if (cl.cnum <= 0) return -EINVAL; err = bnep_get_connlist(&cl); if (!err && put_user(cl.cnum, p)) err = -EFAULT; return err; } return do_bnep_sock_ioctl(sock, cmd, argp); } #endif static const struct proto_ops bnep_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = bnep_sock_release, .ioctl = bnep_sock_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = bnep_sock_compat_ioctl, #endif .bind = sock_no_bind, .getname = sock_no_getname, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .mmap = sock_no_mmap }; static struct proto bnep_proto = { .name = "BNEP", .owner = THIS_MODULE, .obj_size = sizeof(struct bt_sock) }; static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; BT_DBG("sock %p", sock); if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sk = bt_sock_alloc(net, sock, &bnep_proto, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; sock->ops = &bnep_sock_ops; sock->state = SS_UNCONNECTED; bt_sock_link(&bnep_sk_list, sk); return 0; } static const struct net_proto_family bnep_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = bnep_sock_create }; int __init bnep_sock_init(void) { int err; err = proto_register(&bnep_proto, 0); if (err < 0) return err; err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops); if (err < 0) { BT_ERR("Can't register BNEP socket"); goto error; } err = bt_procfs_init(&init_net, "bnep", &bnep_sk_list, NULL); if (err < 0) { BT_ERR("Failed to create BNEP proc file"); bt_sock_unregister(BTPROTO_BNEP); goto error; } BT_INFO("BNEP socket layer initialized"); return 0; error: proto_unregister(&bnep_proto); return err; } void __exit bnep_sock_cleanup(void) { bt_procfs_cleanup(&init_net, "bnep"); bt_sock_unregister(BTPROTO_BNEP); proto_unregister(&bnep_proto); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ #ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ #define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ #include "main.h" #include <linux/kref.h> #include <linux/netlink.h> #include <linux/skbuff.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> void batadv_gw_check_client_stop(struct batadv_priv *bat_priv); void batadv_gw_reselect(struct batadv_priv *bat_priv); void batadv_gw_election(struct batadv_priv *bat_priv); struct batadv_orig_node * batadv_gw_get_selected_orig(struct batadv_priv *bat_priv); void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_tvlv_gateway_data *gateway); void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_free(struct batadv_priv *bat_priv); void batadv_gw_node_release(struct kref *ref); struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv); int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, u8 *chaddr); struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); /** * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release * it * @gw_node: gateway node to free */ static inline void batadv_gw_node_put(struct batadv_gw_node *gw_node) { if (!gw_node) return; kref_put(&gw_node->refcount, batadv_gw_node_release); } #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 // SPDX-License-Identifier: GPL-2.0 /* * Helper routines for building identity mapping page tables. This is * included by both the compressed kernel and the regular kernel. */ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page, unsigned long addr, unsigned long end) { addr &= PMD_MASK; for (; addr < end; addr += PMD_SIZE) { pmd_t *pmd = pmd_page + pmd_index(addr); if (pmd_present(*pmd)) continue; set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag)); } } static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, unsigned long addr, unsigned long end) { unsigned long next; for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; if (info->direct_gbpages) { pud_t pudval; if (pud_present(*pud)) continue; addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; } if (pud_present(*pud)) { pmd = pmd_offset(pud, 0); ident_pmd_init(info, pmd, addr, next); continue; } pmd = (pmd_t *)info->alloc_pgt_page(info->context); if (!pmd) return -ENOMEM; ident_pmd_init(info, pmd, addr, next); set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag)); } return 0; } static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, unsigned long addr, unsigned long end) { unsigned long next; int result; for (; addr < end; addr = next) { p4d_t *p4d = p4d_page + p4d_index(addr); pud_t *pud; next = (addr & P4D_MASK) + P4D_SIZE; if (next > end) next = end; if (p4d_present(*p4d)) { pud = pud_offset(p4d, 0); result = ident_pud_init(info, pud, addr, next); if (result) return result; continue; } pud = (pud_t *)info->alloc_pgt_page(info->context); if (!pud) return -ENOMEM; result = ident_pud_init(info, pud, addr, next); if (result) return result; set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); } return 0; } int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, unsigned long pstart, unsigned long pend) { unsigned long addr = pstart + info->offset; unsigned long end = pend + info->offset; unsigned long next; int result; /* Set the default pagetable flags if not supplied */ if (!info->kernpg_flag) info->kernpg_flag = _KERNPG_TABLE; /* Filter out unsupported __PAGE_KERNEL_* bits: */ info->kernpg_flag &= __default_kernel_pte_mask; for (; addr < end; addr = next) { pgd_t *pgd = pgd_page + pgd_index(addr); p4d_t *p4d; next = (addr & PGDIR_MASK) + PGDIR_SIZE; if (next > end) next = end; if (pgd_present(*pgd)) { p4d = p4d_offset(pgd, 0); result = ident_p4d_init(info, p4d, addr, next); if (result) return result; continue; } p4d = (p4d_t *)info->alloc_pgt_page(info->context); if (!p4d) return -ENOMEM; result = ident_p4d_init(info, p4d, addr, next); if (result) return result; if (pgtable_l5_enabled()) { set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); } else { /* * With p4d folded, pgd is equal to p4d. * The pgd entry has to point to the pud page table in this case. */ pud_t *pud = pud_offset(p4d, 0); set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag)); } } return 0; }
60 15 4 43 47 36 25 21 3 39 4 58 58 2 18 18 20 11 11 14 20 21 21 21 10 6 6 10 3 3 5 10 21 14 21 6 6 10 10 10 10 10 7 12 12 6 6 1 5 5 10 6 6 6 1 6 20 19 11 21 19 18 29 17 29 8 8 23 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. */ #include <linux/fs.h> #include <linux/string.h> #include <linux/errno.h> #include "nilfs.h" #include "bmap.h" #include "btree.h" #include "direct.h" #include "btnode.h" #include "mdt.h" #include "dat.h" #include "alloc.h" struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) { struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info; return nilfs->ns_dat; } static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, const char *fname, int err) { struct inode *inode = bmap->b_inode; if (err == -EINVAL) { __nilfs_error(inode->i_sb, fname, "broken bmap (inode number=%lu)", inode->i_ino); err = -EIO; } return err; } /** * nilfs_bmap_lookup_at_level - find a data block or node block * @bmap: bmap * @key: key * @level: level * @ptrp: place to store the value associated to @key * * Description: nilfs_bmap_lookup_at_level() finds a record whose key * matches @key in the block at @level of the bmap. * * Return Value: On success, 0 is returned and the record associated with @key * is stored in the place pointed by @ptrp. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - A record associated with @key does not exist. */ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, __u64 *ptrp) { sector_t blocknr; int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); if (ret < 0) goto out; if (NILFS_BMAP_USE_VBN(bmap)) { ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, &blocknr); if (!ret) *ptrp = blocknr; else if (ret == -ENOENT) { /* * If there was no valid entry in DAT for the block * address obtained by b_ops->bop_lookup, then pass * internal code -EINVAL to nilfs_bmap_convert_error * to treat it as metadata corruption. */ ret = -EINVAL; } } out: up_read(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, unsigned int maxblocks) { int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); up_read(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) { __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; __u64 ptrs[NILFS_BMAP_SMALL_HIGH + 1]; int ret, n; if (bmap->b_ops->bop_check_insert != NULL) { ret = bmap->b_ops->bop_check_insert(bmap, key); if (ret > 0) { n = bmap->b_ops->bop_gather_data( bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1); if (n < 0) return n; ret = nilfs_btree_convert_and_insert( bmap, key, ptr, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags |= NILFS_BMAP_LARGE; return ret; } else if (ret < 0) return ret; } return bmap->b_ops->bop_insert(bmap, key, ptr); } /** * nilfs_bmap_insert - insert a new key-record pair into a bmap * @bmap: bmap * @key: key * @rec: record * * Description: nilfs_bmap_insert() inserts the new key-record pair specified * by @key and @rec into @bmap. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-EEXIST - A record associated with @key already exist. */ int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_do_insert(bmap, key, rec); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) { __u64 keys[NILFS_BMAP_LARGE_LOW + 1]; __u64 ptrs[NILFS_BMAP_LARGE_LOW + 1]; int ret, n; if (bmap->b_ops->bop_check_delete != NULL) { ret = bmap->b_ops->bop_check_delete(bmap, key); if (ret > 0) { n = bmap->b_ops->bop_gather_data( bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1); if (n < 0) return n; ret = nilfs_direct_delete_and_convert( bmap, key, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; return ret; } else if (ret < 0) return ret; } return bmap->b_ops->bop_delete(bmap, key); } /** * nilfs_bmap_seek_key - seek a valid entry and return its key * @bmap: bmap struct * @start: start key number * @keyp: place to store valid key * * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap * starting from @start, and stores it to @keyp if found. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - No valid entry was found */ int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp) { int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_seek_key(bmap, start, keyp); up_read(&bmap->b_sem); if (ret < 0) ret = nilfs_bmap_convert_error(bmap, __func__, ret); return ret; } int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp) { int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_last_key(bmap, keyp); up_read(&bmap->b_sem); if (ret < 0) ret = nilfs_bmap_convert_error(bmap, __func__, ret); return ret; } /** * nilfs_bmap_delete - delete a key-record pair from a bmap * @bmap: bmap * @key: key * * Description: nilfs_bmap_delete() deletes the key-record pair specified by * @key from @bmap. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - A record associated with @key does not exist. */ int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_do_delete(bmap, key); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key) { __u64 lastkey; int ret; ret = bmap->b_ops->bop_last_key(bmap, &lastkey); if (ret < 0) { if (ret == -ENOENT) ret = 0; return ret; } while (key <= lastkey) { ret = nilfs_bmap_do_delete(bmap, lastkey); if (ret < 0) return ret; ret = bmap->b_ops->bop_last_key(bmap, &lastkey); if (ret < 0) { if (ret == -ENOENT) ret = 0; return ret; } } return 0; } /** * nilfs_bmap_truncate - truncate a bmap to a specified key * @bmap: bmap * @key: key * * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are * greater than or equal to @key from @bmap. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_do_truncate(bmap, key); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_clear - free resources a bmap holds * @bmap: bmap * * Description: nilfs_bmap_clear() frees resources associated with @bmap. */ void nilfs_bmap_clear(struct nilfs_bmap *bmap) { down_write(&bmap->b_sem); if (bmap->b_ops->bop_clear != NULL) bmap->b_ops->bop_clear(bmap); up_write(&bmap->b_sem); } /** * nilfs_bmap_propagate - propagate dirty state * @bmap: bmap * @bh: buffer head * * Description: nilfs_bmap_propagate() marks the buffers that directly or * indirectly refer to the block specified by @bh dirty. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) { int ret; down_write(&bmap->b_sem); ret = bmap->b_ops->bop_propagate(bmap, bh); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_lookup_dirty_buffers - * @bmap: bmap * @listp: pointer to buffer head list */ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, struct list_head *listp) { if (bmap->b_ops->bop_lookup_dirty_buffers != NULL) bmap->b_ops->bop_lookup_dirty_buffers(bmap, listp); } /** * nilfs_bmap_assign - assign a new block number to a block * @bmap: bmap * @bh: pointer to buffer head * @blocknr: block number * @binfo: block information * * Description: nilfs_bmap_assign() assigns the block number @blocknr to the * buffer specified by @bh. * * Return Value: On success, 0 is returned and the buffer head of a newly * create buffer and the block information associated with the buffer are * stored in the place pointed by @bh and @binfo, respectively. On error, one * of the following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_assign(struct nilfs_bmap *bmap, struct buffer_head **bh, unsigned long blocknr, union nilfs_binfo *binfo) { int ret; down_write(&bmap->b_sem); ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_mark - mark block dirty * @bmap: bmap * @key: key * @level: level * * Description: nilfs_bmap_mark() marks the block specified by @key and @level * as dirty. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) { int ret; if (bmap->b_ops->bop_mark == NULL) return 0; down_write(&bmap->b_sem); ret = bmap->b_ops->bop_mark(bmap, key, level); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_test_and_clear_dirty - test and clear a bmap dirty state * @bmap: bmap * * Description: nilfs_test_and_clear() is the atomic operation to test and * clear the dirty state of @bmap. * * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. */ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_dirty(bmap); nilfs_bmap_clear_dirty(bmap); up_write(&bmap->b_sem); return ret; } /* * Internal use only */ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { struct buffer_head *pbh; __u64 key; key = page_index(bh->b_page) << (PAGE_SHIFT - bmap->b_inode->i_blkbits); for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) key++; return key; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) { __s64 diff; diff = key - bmap->b_last_allocated_key; if ((nilfs_bmap_keydiff_abs(diff) < NILFS_INODE_BMAP_SIZE) && (bmap->b_last_allocated_ptr != NILFS_BMAP_INVALID_PTR) && (bmap->b_last_allocated_ptr + diff > 0)) return bmap->b_last_allocated_ptr + diff; else return NILFS_BMAP_INVALID_PTR; } #define NILFS_BMAP_GROUP_DIV 8 __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) { struct inode *dat = nilfs_bmap_get_dat(bmap); unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat); unsigned long group = bmap->b_inode->i_ino / entries_per_group; return group * entries_per_group + (bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) * (entries_per_group / NILFS_BMAP_GROUP_DIV); } static struct lock_class_key nilfs_bmap_dat_lock_key; static struct lock_class_key nilfs_bmap_mdt_lock_key; /** * nilfs_bmap_read - read a bmap from an inode * @bmap: bmap * @raw_inode: on-disk inode * * Description: nilfs_bmap_read() initializes the bmap @bmap. * * Return Value: On success, 0 is returned. On error, the following negative * error code is returned. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { if (raw_inode == NULL) memset(bmap->b_u.u_data, 0, NILFS_BMAP_SIZE); else memcpy(bmap->b_u.u_data, raw_inode->i_bmap, NILFS_BMAP_SIZE); init_rwsem(&bmap->b_sem); bmap->b_state = 0; bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; switch (bmap->b_inode->i_ino) { case NILFS_DAT_INO: bmap->b_ptr_type = NILFS_BMAP_PTR_P; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); break; case NILFS_CPFILE_INO: case NILFS_SUFILE_INO: bmap->b_ptr_type = NILFS_BMAP_PTR_VS; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); break; case NILFS_IFILE_INO: lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); fallthrough; default: bmap->b_ptr_type = NILFS_BMAP_PTR_VM; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; break; } return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? nilfs_btree_init(bmap) : nilfs_direct_init(bmap); } /** * nilfs_bmap_write - write back a bmap to an inode * @bmap: bmap * @raw_inode: on-disk inode * * Description: nilfs_bmap_write() stores @bmap in @raw_inode. */ void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { memcpy(raw_inode->i_bmap, bmap->b_u.u_data, NILFS_INODE_BMAP_SIZE * sizeof(__le64)); if (bmap->b_inode->i_ino == NILFS_DAT_INO) bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; } void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) { memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); init_rwsem(&bmap->b_sem); bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; bmap->b_ptr_type = NILFS_BMAP_PTR_U; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; bmap->b_state = 0; nilfs_btree_init_gc(bmap); } void nilfs_bmap_save(const struct nilfs_bmap *bmap, struct nilfs_bmap_store *store) { memcpy(store->data, bmap->b_u.u_data, sizeof(store->data)); store->last_allocated_key = bmap->b_last_allocated_key; store->last_allocated_ptr = bmap->b_last_allocated_ptr; store->state = bmap->b_state; } void nilfs_bmap_restore(struct nilfs_bmap *bmap, const struct nilfs_bmap_store *store) { memcpy(bmap->b_u.u_data, store->data, sizeof(store->data)); bmap->b_last_allocated_key = store->last_allocated_key; bmap->b_last_allocated_ptr = store->last_allocated_ptr; bmap->b_state = store->state; }
56 55 56 5 4 4 3 2 3 3 1 1 1 1 14 76 76 6 6 14 10 10 10 6 75 71 26 75 57 57 57 57 57 57 55 55 55 57 14 4 10 3 7 1 6 6 6 1 5 5 5 2 5 2 4 1 4 3 3 3 3 2 1 1 5 19 1 1 18 1 1 1 17 2 2 2 15 1 14 1 13 13 1 12 2 10 1 9 1 8 3 5 5 5 5 2 3 36 3 3 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 8 3 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 8 16 8 8 5 5 3 3 16 15 2 2 2 2 2 2 2 2 2 2 38 35 3 3 3 14 14 14 19 3 19 36 36 38 2 2 1 1 2 25 24 7 15 13 13 15 2 2 2 1 1 19 23 23 1 23 13 1 13 34 9 8 34 1 1 1 1 1 1 50 50 1 49 1 48 1 47 1 1 46 1 45 45 45 41 42 1 42 2 41 2 23 39 36 17 35 4 34 2 33 11 24 25 23 22 1 22 2 20 45 34 3 45 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 /* * Copyright (C) 2014 Red Hat * Copyright (C) 2014 Intel Corp. * Copyright (C) 2018 Intel Corp. * Copyright (c) 2020, The Linux Foundation. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Rob Clark <robdclark@gmail.com> * Daniel Vetter <daniel.vetter@ffwll.ch> */ #include <drm/drm_atomic_uapi.h> #include <drm/drm_atomic.h> #include <drm/drm_framebuffer.h> #include <drm/drm_print.h> #include <drm/drm_drv.h> #include <drm/drm_writeback.h> #include <drm/drm_vblank.h> #include <linux/dma-fence.h> #include <linux/uaccess.h> #include <linux/sync_file.h> #include <linux/file.h> #include "drm_crtc_internal.h" /** * DOC: overview * * This file contains the marshalling and demarshalling glue for the atomic UAPI * in all its forms: The monster ATOMIC IOCTL itself, code for GET_PROPERTY and * SET_PROPERTY IOCTLs. Plus interface functions for compatibility helpers and * drivers which have special needs to construct their own atomic updates, e.g. * for load detect or similar. */ /** * drm_atomic_set_mode_for_crtc - set mode for CRTC * @state: the CRTC whose incoming state to update * @mode: kernel-internal mode to use for the CRTC, or NULL to disable * * Set a mode (originating from the kernel) on the desired CRTC state and update * the enable property. * * RETURNS: * Zero on success, error code on failure. Cannot return -EDEADLK. */ int drm_atomic_set_mode_for_crtc(struct drm_crtc_state *state, const struct drm_display_mode *mode) { struct drm_crtc *crtc = state->crtc; struct drm_mode_modeinfo umode; /* Early return for no change. */ if (mode && memcmp(&state->mode, mode, sizeof(*mode)) == 0) return 0; drm_property_blob_put(state->mode_blob); state->mode_blob = NULL; if (mode) { struct drm_property_blob *blob; drm_mode_convert_to_umode(&umode, mode); blob = drm_property_create_blob(crtc->dev, sizeof(umode), &umode); if (IS_ERR(blob)) return PTR_ERR(blob); drm_mode_copy(&state->mode, mode); state->mode_blob = blob; state->enable = true; drm_dbg_atomic(crtc->dev, "Set [MODE:%s] for [CRTC:%d:%s] state %p\n", mode->name, crtc->base.id, crtc->name, state); } else { memset(&state->mode, 0, sizeof(state->mode)); state->enable = false; drm_dbg_atomic(crtc->dev, "Set [NOMODE] for [CRTC:%d:%s] state %p\n", crtc->base.id, crtc->name, state); } return 0; } EXPORT_SYMBOL(drm_atomic_set_mode_for_crtc); /** * drm_atomic_set_mode_prop_for_crtc - set mode for CRTC * @state: the CRTC whose incoming state to update * @blob: pointer to blob property to use for mode * * Set a mode (originating from a blob property) on the desired CRTC state. * This function will take a reference on the blob property for the CRTC state, * and release the reference held on the state's existing mode property, if any * was set. * * RETURNS: * Zero on success, error code on failure. Cannot return -EDEADLK. */ int drm_atomic_set_mode_prop_for_crtc(struct drm_crtc_state *state, struct drm_property_blob *blob) { struct drm_crtc *crtc = state->crtc; if (blob == state->mode_blob) return 0; drm_property_blob_put(state->mode_blob); state->mode_blob = NULL; memset(&state->mode, 0, sizeof(state->mode)); if (blob) { int ret; if (blob->length != sizeof(struct drm_mode_modeinfo)) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] bad mode blob length: %zu\n", crtc->base.id, crtc->name, blob->length); return -EINVAL; } ret = drm_mode_convert_umode(crtc->dev, &state->mode, blob->data); if (ret) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] invalid mode (ret=%d, status=%s):\n", crtc->base.id, crtc->name, ret, drm_get_mode_status_name(state->mode.status)); drm_mode_debug_printmodeline(&state->mode); return -EINVAL; } state->mode_blob = drm_property_blob_get(blob); state->enable = true; drm_dbg_atomic(crtc->dev, "Set [MODE:%s] for [CRTC:%d:%s] state %p\n", state->mode.name, crtc->base.id, crtc->name, state); } else { state->enable = false; drm_dbg_atomic(crtc->dev, "Set [NOMODE] for [CRTC:%d:%s] state %p\n", crtc->base.id, crtc->name, state); } return 0; } EXPORT_SYMBOL(drm_atomic_set_mode_prop_for_crtc); /** * drm_atomic_set_crtc_for_plane - set CRTC for plane * @plane_state: the plane whose incoming state to update * @crtc: CRTC to use for the plane * * Changing the assigned CRTC for a plane requires us to grab the lock and state * for the new CRTC, as needed. This function takes care of all these details * besides updating the pointer in the state object itself. * * Returns: * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK * then the w/w mutex code has detected a deadlock and the entire atomic * sequence must be restarted. All other errors are fatal. */ int drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, struct drm_crtc *crtc) { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; /* Nothing to do for same crtc*/ if (plane_state->crtc == crtc) return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); if (WARN_ON(IS_ERR(crtc_state))) return PTR_ERR(crtc_state); crtc_state->plane_mask &= ~drm_plane_mask(plane); } plane_state->crtc = crtc; if (crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); crtc_state->plane_mask |= drm_plane_mask(plane); } if (crtc) drm_dbg_atomic(plane->dev, "Link [PLANE:%d:%s] state %p to [CRTC:%d:%s]\n", plane->base.id, plane->name, plane_state, crtc->base.id, crtc->name); else drm_dbg_atomic(plane->dev, "Link [PLANE:%d:%s] state %p to [NOCRTC]\n", plane->base.id, plane->name, plane_state); return 0; } EXPORT_SYMBOL(drm_atomic_set_crtc_for_plane); /** * drm_atomic_set_fb_for_plane - set framebuffer for plane * @plane_state: atomic state object for the plane * @fb: fb to use for the plane * * Changing the assigned framebuffer for a plane requires us to grab a reference * to the new fb and drop the reference to the old fb, if there is one. This * function takes care of all these details besides updating the pointer in the * state object itself. */ void drm_atomic_set_fb_for_plane(struct drm_plane_state *plane_state, struct drm_framebuffer *fb) { struct drm_plane *plane = plane_state->plane; if (fb) drm_dbg_atomic(plane->dev, "Set [FB:%d] for [PLANE:%d:%s] state %p\n", fb->base.id, plane->base.id, plane->name, plane_state); else drm_dbg_atomic(plane->dev, "Set [NOFB] for [PLANE:%d:%s] state %p\n", plane->base.id, plane->name, plane_state); drm_framebuffer_assign(&plane_state->fb, fb); } EXPORT_SYMBOL(drm_atomic_set_fb_for_plane); /** * drm_atomic_set_crtc_for_connector - set CRTC for connector * @conn_state: atomic state object for the connector * @crtc: CRTC to use for the connector * * Changing the assigned CRTC for a connector requires us to grab the lock and * state for the new CRTC, as needed. This function takes care of all these * details besides updating the pointer in the state object itself. * * Returns: * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK * then the w/w mutex code has detected a deadlock and the entire atomic * sequence must be restarted. All other errors are fatal. */ int drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state, struct drm_crtc *crtc) { struct drm_connector *connector = conn_state->connector; struct drm_crtc_state *crtc_state; if (conn_state->crtc == crtc) return 0; if (conn_state->crtc) { crtc_state = drm_atomic_get_new_crtc_state(conn_state->state, conn_state->crtc); crtc_state->connector_mask &= ~drm_connector_mask(conn_state->connector); drm_connector_put(conn_state->connector); conn_state->crtc = NULL; } if (crtc) { crtc_state = drm_atomic_get_crtc_state(conn_state->state, crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); crtc_state->connector_mask |= drm_connector_mask(conn_state->connector); drm_connector_get(conn_state->connector); conn_state->crtc = crtc; drm_dbg_atomic(connector->dev, "Link [CONNECTOR:%d:%s] state %p to [CRTC:%d:%s]\n", connector->base.id, connector->name, conn_state, crtc->base.id, crtc->name); } else { drm_dbg_atomic(connector->dev, "Link [CONNECTOR:%d:%s] state %p to [NOCRTC]\n", connector->base.id, connector->name, conn_state); } return 0; } EXPORT_SYMBOL(drm_atomic_set_crtc_for_connector); static void set_out_fence_for_crtc(struct drm_atomic_state *state, struct drm_crtc *crtc, s32 __user *fence_ptr) { state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = fence_ptr; } static s32 __user *get_out_fence_for_crtc(struct drm_atomic_state *state, struct drm_crtc *crtc) { s32 __user *fence_ptr; fence_ptr = state->crtcs[drm_crtc_index(crtc)].out_fence_ptr; state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = NULL; return fence_ptr; } static int set_out_fence_for_connector(struct drm_atomic_state *state, struct drm_connector *connector, s32 __user *fence_ptr) { unsigned int index = drm_connector_index(connector); if (!fence_ptr) return 0; if (put_user(-1, fence_ptr)) return -EFAULT; state->connectors[index].out_fence_ptr = fence_ptr; return 0; } static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state, struct drm_connector *connector) { unsigned int index = drm_connector_index(connector); s32 __user *fence_ptr; fence_ptr = state->connectors[index].out_fence_ptr; state->connectors[index].out_fence_ptr = NULL; return fence_ptr; } static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, struct drm_crtc_state *state, struct drm_property *property, uint64_t val) { struct drm_device *dev = crtc->dev; struct drm_mode_config *config = &dev->mode_config; bool replaced = false; int ret; if (property == config->prop_active) state->active = val; else if (property == config->prop_mode_id) { struct drm_property_blob *mode = drm_property_lookup_blob(dev, val); ret = drm_atomic_set_mode_prop_for_crtc(state, mode); drm_property_blob_put(mode); return ret; } else if (property == config->prop_vrr_enabled) { state->vrr_enabled = val; } else if (property == config->degamma_lut_property) { ret = drm_property_replace_blob_from_id(dev, &state->degamma_lut, val, -1, sizeof(struct drm_color_lut), &replaced); state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { ret = drm_property_replace_blob_from_id(dev, &state->ctm, val, sizeof(struct drm_color_ctm), -1, &replaced); state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { ret = drm_property_replace_blob_from_id(dev, &state->gamma_lut, val, -1, sizeof(struct drm_color_lut), &replaced); state->color_mgmt_changed |= replaced; return ret; } else if (property == config->prop_out_fence_ptr) { s32 __user *fence_ptr = u64_to_user_ptr(val); if (!fence_ptr) return 0; if (put_user(-1, fence_ptr)) return -EFAULT; set_out_fence_for_crtc(state->state, crtc, fence_ptr); } else if (property == crtc->scaling_filter_property) { state->scaling_filter = val; } else if (crtc->funcs->atomic_set_property) { return crtc->funcs->atomic_set_property(crtc, state, property, val); } else { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] unknown property [PROP:%d:%s]\n", crtc->base.id, crtc->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_crtc_get_property(struct drm_crtc *crtc, const struct drm_crtc_state *state, struct drm_property *property, uint64_t *val) { struct drm_device *dev = crtc->dev; struct drm_mode_config *config = &dev->mode_config; if (property == config->prop_active) *val = drm_atomic_crtc_effectively_active(state); else if (property == config->prop_mode_id) *val = (state->mode_blob) ? state->mode_blob->base.id : 0; else if (property == config->prop_vrr_enabled) *val = state->vrr_enabled; else if (property == config->degamma_lut_property) *val = (state->degamma_lut) ? state->degamma_lut->base.id : 0; else if (property == config->ctm_property) *val = (state->ctm) ? state->ctm->base.id : 0; else if (property == config->gamma_lut_property) *val = (state->gamma_lut) ? state->gamma_lut->base.id : 0; else if (property == config->prop_out_fence_ptr) *val = 0; else if (property == crtc->scaling_filter_property) *val = state->scaling_filter; else if (crtc->funcs->atomic_get_property) return crtc->funcs->atomic_get_property(crtc, state, property, val); else { drm_dbg_atomic(dev, "[CRTC:%d:%s] unknown property [PROP:%d:%s]\n", crtc->base.id, crtc->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_plane_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_file *file_priv, struct drm_property *property, uint64_t val) { struct drm_device *dev = plane->dev; struct drm_mode_config *config = &dev->mode_config; bool replaced = false; int ret; if (property == config->prop_fb_id) { struct drm_framebuffer *fb; fb = drm_framebuffer_lookup(dev, file_priv, val); drm_atomic_set_fb_for_plane(state, fb); if (fb) drm_framebuffer_put(fb); } else if (property == config->prop_in_fence_fd) { if (state->fence) return -EINVAL; if (U642I64(val) == -1) return 0; state->fence = sync_file_get_fence(val); if (!state->fence) return -EINVAL; } else if (property == config->prop_crtc_id) { struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val); if (val && !crtc) { drm_dbg_atomic(dev, "[PROP:%d:%s] cannot find CRTC with ID %llu\n", property->base.id, property->name, val); return -EACCES; } return drm_atomic_set_crtc_for_plane(state, crtc); } else if (property == config->prop_crtc_x) { state->crtc_x = U642I64(val); } else if (property == config->prop_crtc_y) { state->crtc_y = U642I64(val); } else if (property == config->prop_crtc_w) { state->crtc_w = val; } else if (property == config->prop_crtc_h) { state->crtc_h = val; } else if (property == config->prop_src_x) { state->src_x = val; } else if (property == config->prop_src_y) { state->src_y = val; } else if (property == config->prop_src_w) { state->src_w = val; } else if (property == config->prop_src_h) { state->src_h = val; } else if (property == plane->alpha_property) { state->alpha = val; } else if (property == plane->blend_mode_property) { state->pixel_blend_mode = val; } else if (property == plane->rotation_property) { if (!is_power_of_2(val & DRM_MODE_ROTATE_MASK)) { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] bad rotation bitmask: 0x%llx\n", plane->base.id, plane->name, val); return -EINVAL; } state->rotation = val; } else if (property == plane->zpos_property) { state->zpos = val; } else if (property == plane->color_encoding_property) { state->color_encoding = val; } else if (property == plane->color_range_property) { state->color_range = val; } else if (property == config->prop_fb_damage_clips) { ret = drm_property_replace_blob_from_id(dev, &state->fb_damage_clips, val, -1, sizeof(struct drm_rect), &replaced); return ret; } else if (property == plane->scaling_filter_property) { state->scaling_filter = val; } else if (plane->funcs->atomic_set_property) { return plane->funcs->atomic_set_property(plane, state, property, val); } else if (property == plane->hotspot_x_property) { if (plane->type != DRM_PLANE_TYPE_CURSOR) { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", plane->base.id, plane->name, val); return -EINVAL; } state->hotspot_x = val; } else if (property == plane->hotspot_y_property) { if (plane->type != DRM_PLANE_TYPE_CURSOR) { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", plane->base.id, plane->name, val); return -EINVAL; } state->hotspot_y = val; } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", plane->base.id, plane->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_plane_get_property(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, uint64_t *val) { struct drm_device *dev = plane->dev; struct drm_mode_config *config = &dev->mode_config; if (property == config->prop_fb_id) { *val = (state->fb) ? state->fb->base.id : 0; } else if (property == config->prop_in_fence_fd) { *val = -1; } else if (property == config->prop_crtc_id) { *val = (state->crtc) ? state->crtc->base.id : 0; } else if (property == config->prop_crtc_x) { *val = I642U64(state->crtc_x); } else if (property == config->prop_crtc_y) { *val = I642U64(state->crtc_y); } else if (property == config->prop_crtc_w) { *val = state->crtc_w; } else if (property == config->prop_crtc_h) { *val = state->crtc_h; } else if (property == config->prop_src_x) { *val = state->src_x; } else if (property == config->prop_src_y) { *val = state->src_y; } else if (property == config->prop_src_w) { *val = state->src_w; } else if (property == config->prop_src_h) { *val = state->src_h; } else if (property == plane->alpha_property) { *val = state->alpha; } else if (property == plane->blend_mode_property) { *val = state->pixel_blend_mode; } else if (property == plane->rotation_property) { *val = state->rotation; } else if (property == plane->zpos_property) { *val = state->zpos; } else if (property == plane->color_encoding_property) { *val = state->color_encoding; } else if (property == plane->color_range_property) { *val = state->color_range; } else if (property == config->prop_fb_damage_clips) { *val = (state->fb_damage_clips) ? state->fb_damage_clips->base.id : 0; } else if (property == plane->scaling_filter_property) { *val = state->scaling_filter; } else if (plane->funcs->atomic_get_property) { return plane->funcs->atomic_get_property(plane, state, property, val); } else if (property == plane->hotspot_x_property) { *val = state->hotspot_x; } else if (property == plane->hotspot_y_property) { *val = state->hotspot_y; } else { drm_dbg_atomic(dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", plane->base.id, plane->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_set_writeback_fb_for_connector( struct drm_connector_state *conn_state, struct drm_framebuffer *fb) { int ret; struct drm_connector *conn = conn_state->connector; ret = drm_writeback_set_fb(conn_state, fb); if (ret < 0) return ret; if (fb) drm_dbg_atomic(conn->dev, "Set [FB:%d] for connector state %p\n", fb->base.id, conn_state); else drm_dbg_atomic(conn->dev, "Set [NOFB] for connector state %p\n", conn_state); return 0; } static int drm_atomic_connector_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_file *file_priv, struct drm_property *property, uint64_t val) { struct drm_device *dev = connector->dev; struct drm_mode_config *config = &dev->mode_config; bool replaced = false; int ret; if (property == config->prop_crtc_id) { struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val); if (val && !crtc) { drm_dbg_atomic(dev, "[PROP:%d:%s] cannot find CRTC with ID %llu\n", property->base.id, property->name, val); return -EACCES; } return drm_atomic_set_crtc_for_connector(state, crtc); } else if (property == config->dpms_property) { /* setting DPMS property requires special handling, which * is done in legacy setprop path for us. Disallow (for * now?) atomic writes to DPMS property: */ drm_dbg_atomic(dev, "legacy [PROP:%d:%s] can only be set via legacy uAPI\n", property->base.id, property->name); return -EINVAL; } else if (property == config->tv_select_subconnector_property) { state->tv.select_subconnector = val; } else if (property == config->tv_subconnector_property) { state->tv.subconnector = val; } else if (property == config->tv_left_margin_property) { state->tv.margins.left = val; } else if (property == config->tv_right_margin_property) { state->tv.margins.right = val; } else if (property == config->tv_top_margin_property) { state->tv.margins.top = val; } else if (property == config->tv_bottom_margin_property) { state->tv.margins.bottom = val; } else if (property == config->legacy_tv_mode_property) { state->tv.legacy_mode = val; } else if (property == config->tv_mode_property) { state->tv.mode = val; } else if (property == config->tv_brightness_property) { state->tv.brightness = val; } else if (property == config->tv_contrast_property) { state->tv.contrast = val; } else if (property == config->tv_flicker_reduction_property) { state->tv.flicker_reduction = val; } else if (property == config->tv_overscan_property) { state->tv.overscan = val; } else if (property == config->tv_saturation_property) { state->tv.saturation = val; } else if (property == config->tv_hue_property) { state->tv.hue = val; } else if (property == config->link_status_property) { /* Never downgrade from GOOD to BAD on userspace's request here, * only hw issues can do that. * * For an atomic property the userspace doesn't need to be able * to understand all the properties, but needs to be able to * restore the state it wants on VT switch. So if the userspace * tries to change the link_status from GOOD to BAD, driver * silently rejects it and returns a 0. This prevents userspace * from accidentally breaking the display when it restores the * state. */ if (state->link_status != DRM_LINK_STATUS_GOOD) state->link_status = val; } else if (property == config->hdr_output_metadata_property) { ret = drm_property_replace_blob_from_id(dev, &state->hdr_output_metadata, val, sizeof(struct hdr_output_metadata), -1, &replaced); return ret; } else if (property == config->aspect_ratio_property) { state->picture_aspect_ratio = val; } else if (property == config->content_type_property) { state->content_type = val; } else if (property == connector->scaling_mode_property) { state->scaling_mode = val; } else if (property == config->content_protection_property) { if (val == DRM_MODE_CONTENT_PROTECTION_ENABLED) { drm_dbg_kms(dev, "only drivers can set CP Enabled\n"); return -EINVAL; } state->content_protection = val; } else if (property == config->hdcp_content_type_property) { state->hdcp_content_type = val; } else if (property == connector->colorspace_property) { state->colorspace = val; } else if (property == config->writeback_fb_id_property) { struct drm_framebuffer *fb; int ret; fb = drm_framebuffer_lookup(dev, file_priv, val); ret = drm_atomic_set_writeback_fb_for_connector(state, fb); if (fb) drm_framebuffer_put(fb); return ret; } else if (property == config->writeback_out_fence_ptr_property) { s32 __user *fence_ptr = u64_to_user_ptr(val); return set_out_fence_for_connector(state->state, connector, fence_ptr); } else if (property == connector->max_bpc_property) { state->max_requested_bpc = val; } else if (property == connector->privacy_screen_sw_state_property) { state->privacy_screen_sw_state = val; } else if (connector->funcs->atomic_set_property) { return connector->funcs->atomic_set_property(connector, state, property, val); } else { drm_dbg_atomic(connector->dev, "[CONNECTOR:%d:%s] unknown property [PROP:%d:%s]\n", connector->base.id, connector->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_connector_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, uint64_t *val) { struct drm_device *dev = connector->dev; struct drm_mode_config *config = &dev->mode_config; if (property == config->prop_crtc_id) { *val = (state->crtc) ? state->crtc->base.id : 0; } else if (property == config->dpms_property) { if (state->crtc && state->crtc->state->self_refresh_active) *val = DRM_MODE_DPMS_ON; else *val = connector->dpms; } else if (property == config->tv_select_subconnector_property) { *val = state->tv.select_subconnector; } else if (property == config->tv_subconnector_property) { *val = state->tv.subconnector; } else if (property == config->tv_left_margin_property) { *val = state->tv.margins.left; } else if (property == config->tv_right_margin_property) { *val = state->tv.margins.right; } else if (property == config->tv_top_margin_property) { *val = state->tv.margins.top; } else if (property == config->tv_bottom_margin_property) { *val = state->tv.margins.bottom; } else if (property == config->legacy_tv_mode_property) { *val = state->tv.legacy_mode; } else if (property == config->tv_mode_property) { *val = state->tv.mode; } else if (property == config->tv_brightness_property) { *val = state->tv.brightness; } else if (property == config->tv_contrast_property) { *val = state->tv.contrast; } else if (property == config->tv_flicker_reduction_property) { *val = state->tv.flicker_reduction; } else if (property == config->tv_overscan_property) { *val = state->tv.overscan; } else if (property == config->tv_saturation_property) { *val = state->tv.saturation; } else if (property == config->tv_hue_property) { *val = state->tv.hue; } else if (property == config->link_status_property) { *val = state->link_status; } else if (property == config->aspect_ratio_property) { *val = state->picture_aspect_ratio; } else if (property == config->content_type_property) { *val = state->content_type; } else if (property == connector->colorspace_property) { *val = state->colorspace; } else if (property == connector->scaling_mode_property) { *val = state->scaling_mode; } else if (property == config->hdr_output_metadata_property) { *val = state->hdr_output_metadata ? state->hdr_output_metadata->base.id : 0; } else if (property == config->content_protection_property) { *val = state->content_protection; } else if (property == config->hdcp_content_type_property) { *val = state->hdcp_content_type; } else if (property == config->writeback_fb_id_property) { /* Writeback framebuffer is one-shot, write and forget */ *val = 0; } else if (property == config->writeback_out_fence_ptr_property) { *val = 0; } else if (property == connector->max_bpc_property) { *val = state->max_requested_bpc; } else if (property == connector->privacy_screen_sw_state_property) { *val = state->privacy_screen_sw_state; } else if (connector->funcs->atomic_get_property) { return connector->funcs->atomic_get_property(connector, state, property, val); } else { drm_dbg_atomic(dev, "[CONNECTOR:%d:%s] unknown property [PROP:%d:%s]\n", connector->base.id, connector->name, property->base.id, property->name); return -EINVAL; } return 0; } int drm_atomic_get_property(struct drm_mode_object *obj, struct drm_property *property, uint64_t *val) { struct drm_device *dev = property->dev; int ret; switch (obj->type) { case DRM_MODE_OBJECT_CONNECTOR: { struct drm_connector *connector = obj_to_connector(obj); WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); ret = drm_atomic_connector_get_property(connector, connector->state, property, val); break; } case DRM_MODE_OBJECT_CRTC: { struct drm_crtc *crtc = obj_to_crtc(obj); WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); ret = drm_atomic_crtc_get_property(crtc, crtc->state, property, val); break; } case DRM_MODE_OBJECT_PLANE: { struct drm_plane *plane = obj_to_plane(obj); WARN_ON(!drm_modeset_is_locked(&plane->mutex)); ret = drm_atomic_plane_get_property(plane, plane->state, property, val); break; } default: drm_dbg_atomic(dev, "[OBJECT:%d] has no properties\n", obj->id); ret = -EINVAL; break; } return ret; } /* * The big monster ioctl */ static struct drm_pending_vblank_event *create_vblank_event( struct drm_crtc *crtc, uint64_t user_data) { struct drm_pending_vblank_event *e = NULL; e = kzalloc(sizeof *e, GFP_KERNEL); if (!e) return NULL; e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); e->event.vbl.crtc_id = crtc->base.id; e->event.vbl.user_data = user_data; return e; } int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, struct drm_connector *connector, int mode) { struct drm_connector *tmp_connector; struct drm_connector_state *new_conn_state; struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; int i, ret, old_mode = connector->dpms; bool active = false; ret = drm_modeset_lock(&state->dev->mode_config.connection_mutex, state->acquire_ctx); if (ret) return ret; if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; connector->dpms = mode; crtc = connector->state->crtc; if (!crtc) goto out; ret = drm_atomic_add_affected_connectors(state, crtc); if (ret) goto out; crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto out; } for_each_new_connector_in_state(state, tmp_connector, new_conn_state, i) { if (new_conn_state->crtc != crtc) continue; if (tmp_connector->dpms == DRM_MODE_DPMS_ON) { active = true; break; } } crtc_state->active = active; ret = drm_atomic_commit(state); out: if (ret != 0) connector->dpms = old_mode; return ret; } static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t prop_value, struct drm_property *prop) { if (ret != 0 || old_val != prop_value) { drm_dbg_atomic(prop->dev, "[PROP:%d:%s] No prop can be changed during async flip\n", prop->base.id, prop->name); return -EINVAL; } return 0; } int drm_atomic_set_property(struct drm_atomic_state *state, struct drm_file *file_priv, struct drm_mode_object *obj, struct drm_property *prop, u64 prop_value, bool async_flip) { struct drm_mode_object *ref; u64 old_val; int ret; if (!drm_property_change_valid_get(prop, prop_value, &ref)) return -EINVAL; switch (obj->type) { case DRM_MODE_OBJECT_CONNECTOR: { struct drm_connector *connector = obj_to_connector(obj); struct drm_connector_state *connector_state; connector_state = drm_atomic_get_connector_state(state, connector); if (IS_ERR(connector_state)) { ret = PTR_ERR(connector_state); break; } if (async_flip) { ret = drm_atomic_connector_get_property(connector, connector_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } ret = drm_atomic_connector_set_property(connector, connector_state, file_priv, prop, prop_value); break; } case DRM_MODE_OBJECT_CRTC: { struct drm_crtc *crtc = obj_to_crtc(obj); struct drm_crtc_state *crtc_state; crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); break; } if (async_flip) { ret = drm_atomic_crtc_get_property(crtc, crtc_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } ret = drm_atomic_crtc_set_property(crtc, crtc_state, prop, prop_value); break; } case DRM_MODE_OBJECT_PLANE: { struct drm_plane *plane = obj_to_plane(obj); struct drm_plane_state *plane_state; struct drm_mode_config *config = &plane->dev->mode_config; plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) { ret = PTR_ERR(plane_state); break; } if (async_flip && prop != config->prop_fb_id) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { drm_dbg_atomic(prop->dev, "[OBJECT:%d] Only primary planes can be changed during async flip\n", obj->id); ret = -EINVAL; break; } ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); break; } default: drm_dbg_atomic(prop->dev, "[OBJECT:%d] has no properties\n", obj->id); ret = -EINVAL; break; } drm_property_change_valid_put(prop, ref); return ret; } /** * DOC: explicit fencing properties * * Explicit fencing allows userspace to control the buffer synchronization * between devices. A Fence or a group of fences are transferred to/from * userspace using Sync File fds and there are two DRM properties for that. * IN_FENCE_FD on each DRM Plane to send fences to the kernel and * OUT_FENCE_PTR on each DRM CRTC to receive fences from the kernel. * * As a contrast, with implicit fencing the kernel keeps track of any * ongoing rendering, and automatically ensures that the atomic update waits * for any pending rendering to complete. This is usually tracked in &struct * dma_resv which can also contain mandatory kernel fences. Implicit syncing * is how Linux traditionally worked (e.g. DRI2/3 on X.org), whereas explicit * fencing is what Android wants. * * "IN_FENCE_FD”: * Use this property to pass a fence that DRM should wait on before * proceeding with the Atomic Commit request and show the framebuffer for * the plane on the screen. The fence can be either a normal fence or a * merged one, the sync_file framework will handle both cases and use a * fence_array if a merged fence is received. Passing -1 here means no * fences to wait on. * * If the Atomic Commit request has the DRM_MODE_ATOMIC_TEST_ONLY flag * it will only check if the Sync File is a valid one. * * On the driver side the fence is stored on the @fence parameter of * &struct drm_plane_state. Drivers which also support implicit fencing * should extract the implicit fence using drm_gem_plane_helper_prepare_fb(), * to make sure there's consistent behaviour between drivers in precedence * of implicit vs. explicit fencing. * * "OUT_FENCE_PTR”: * Use this property to pass a file descriptor pointer to DRM. Once the * Atomic Commit request call returns OUT_FENCE_PTR will be filled with * the file descriptor number of a Sync File. This Sync File contains the * CRTC fence that will be signaled when all framebuffers present on the * Atomic Commit * request for that given CRTC are scanned out on the * screen. * * The Atomic Commit request fails if a invalid pointer is passed. If the * Atomic Commit request fails for any other reason the out fence fd * returned will be -1. On a Atomic Commit with the * DRM_MODE_ATOMIC_TEST_ONLY flag the out fence will also be set to -1. * * Note that out-fences don't have a special interface to drivers and are * internally represented by a &struct drm_pending_vblank_event in struct * &drm_crtc_state, which is also used by the nonblocking atomic commit * helpers and for the DRM event handling for existing userspace. */ struct drm_out_fence_state { s32 __user *out_fence_ptr; struct sync_file *sync_file; int fd; }; static int setup_out_fence(struct drm_out_fence_state *fence_state, struct dma_fence *fence) { fence_state->fd = get_unused_fd_flags(O_CLOEXEC); if (fence_state->fd < 0) return fence_state->fd; if (put_user(fence_state->fd, fence_state->out_fence_ptr)) return -EFAULT; fence_state->sync_file = sync_file_create(fence); if (!fence_state->sync_file) return -ENOMEM; return 0; } static int prepare_signaling(struct drm_device *dev, struct drm_atomic_state *state, struct drm_mode_atomic *arg, struct drm_file *file_priv, struct drm_out_fence_state **fence_state, unsigned int *num_fences) { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; struct drm_connector *conn; struct drm_connector_state *conn_state; int i, c = 0, ret; if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) return 0; for_each_new_crtc_in_state(state, crtc, crtc_state, i) { s32 __user *fence_ptr; fence_ptr = get_out_fence_for_crtc(crtc_state->state, crtc); if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT || fence_ptr) { struct drm_pending_vblank_event *e; e = create_vblank_event(crtc, arg->user_data); if (!e) return -ENOMEM; crtc_state->event = e; } if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) { struct drm_pending_vblank_event *e = crtc_state->event; if (!file_priv) continue; ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base); if (ret) { kfree(e); crtc_state->event = NULL; return ret; } } if (fence_ptr) { struct dma_fence *fence; struct drm_out_fence_state *f; f = krealloc(*fence_state, sizeof(**fence_state) * (*num_fences + 1), GFP_KERNEL); if (!f) return -ENOMEM; memset(&f[*num_fences], 0, sizeof(*f)); f[*num_fences].out_fence_ptr = fence_ptr; *fence_state = f; fence = drm_crtc_create_fence(crtc); if (!fence) return -ENOMEM; ret = setup_out_fence(&f[(*num_fences)++], fence); if (ret) { dma_fence_put(fence); return ret; } crtc_state->event->base.fence = fence; } c++; } for_each_new_connector_in_state(state, conn, conn_state, i) { struct drm_writeback_connector *wb_conn; struct drm_out_fence_state *f; struct dma_fence *fence; s32 __user *fence_ptr; if (!conn_state->writeback_job) continue; fence_ptr = get_out_fence_for_connector(state, conn); if (!fence_ptr) continue; f = krealloc(*fence_state, sizeof(**fence_state) * (*num_fences + 1), GFP_KERNEL); if (!f) return -ENOMEM; memset(&f[*num_fences], 0, sizeof(*f)); f[*num_fences].out_fence_ptr = fence_ptr; *fence_state = f; wb_conn = drm_connector_to_writeback(conn); fence = drm_writeback_get_out_fence(wb_conn); if (!fence) return -ENOMEM; ret = setup_out_fence(&f[(*num_fences)++], fence); if (ret) { dma_fence_put(fence); return ret; } conn_state->writeback_job->out_fence = fence; } /* * Having this flag means user mode pends on event which will never * reach due to lack of at least one CRTC for signaling */ if (c == 0 && (arg->flags & DRM_MODE_PAGE_FLIP_EVENT)) { drm_dbg_atomic(dev, "need at least one CRTC for DRM_MODE_PAGE_FLIP_EVENT"); return -EINVAL; } return 0; } static void complete_signaling(struct drm_device *dev, struct drm_atomic_state *state, struct drm_out_fence_state *fence_state, unsigned int num_fences, bool install_fds) { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; int i; if (install_fds) { for (i = 0; i < num_fences; i++) fd_install(fence_state[i].fd, fence_state[i].sync_file->file); kfree(fence_state); return; } for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct drm_pending_vblank_event *event = crtc_state->event; /* * Free the allocated event. drm_atomic_helper_setup_commit * can allocate an event too, so only free it if it's ours * to prevent a double free in drm_atomic_state_clear. */ if (event && (event->base.fence || event->base.file_priv)) { drm_event_cancel_free(dev, &event->base); crtc_state->event = NULL; } } if (!fence_state) return; for (i = 0; i < num_fences; i++) { if (fence_state[i].sync_file) fput(fence_state[i].sync_file->file); if (fence_state[i].fd >= 0) put_unused_fd(fence_state[i].fd); /* If this fails log error to the user */ if (fence_state[i].out_fence_ptr && put_user(-1, fence_state[i].out_fence_ptr)) drm_dbg_atomic(dev, "Couldn't clear out_fence_ptr\n"); } kfree(fence_state); } static void set_async_flip(struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; int i; for_each_new_crtc_in_state(state, crtc, crtc_state, i) { crtc_state->async_flip = true; } } int drm_mode_atomic_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_atomic *arg = data; uint32_t __user *objs_ptr = (uint32_t __user *)(unsigned long)(arg->objs_ptr); uint32_t __user *count_props_ptr = (uint32_t __user *)(unsigned long)(arg->count_props_ptr); uint32_t __user *props_ptr = (uint32_t __user *)(unsigned long)(arg->props_ptr); uint64_t __user *prop_values_ptr = (uint64_t __user *)(unsigned long)(arg->prop_values_ptr); unsigned int copied_objs, copied_props; struct drm_atomic_state *state; struct drm_modeset_acquire_ctx ctx; struct drm_out_fence_state *fence_state; int ret = 0; unsigned int i, j, num_fences; bool async_flip = false; /* disallow for drivers not supporting atomic: */ if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) return -EOPNOTSUPP; /* disallow for userspace that has not enabled atomic cap (even * though this may be a bit overkill, since legacy userspace * wouldn't know how to call this ioctl) */ if (!file_priv->atomic) { drm_dbg_atomic(dev, "commit failed: atomic cap not enabled\n"); return -EINVAL; } if (arg->flags & ~DRM_MODE_ATOMIC_FLAGS) { drm_dbg_atomic(dev, "commit failed: invalid flag\n"); return -EINVAL; } if (arg->reserved) { drm_dbg_atomic(dev, "commit failed: reserved field set\n"); return -EINVAL; } if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) { if (!dev->mode_config.async_page_flip) { drm_dbg_atomic(dev, "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported\n"); return -EINVAL; } async_flip = true; } /* can't test and expect an event at the same time. */ if ((arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) && (arg->flags & DRM_MODE_PAGE_FLIP_EVENT)) { drm_dbg_atomic(dev, "commit failed: page-flip event requested with test-only commit\n"); return -EINVAL; } state = drm_atomic_state_alloc(dev); if (!state) return -ENOMEM; drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); state->acquire_ctx = &ctx; state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET); retry: copied_objs = 0; copied_props = 0; fence_state = NULL; num_fences = 0; for (i = 0; i < arg->count_objs; i++) { uint32_t obj_id, count_props; struct drm_mode_object *obj; if (get_user(obj_id, objs_ptr + copied_objs)) { ret = -EFAULT; goto out; } obj = drm_mode_object_find(dev, file_priv, obj_id, DRM_MODE_OBJECT_ANY); if (!obj) { drm_dbg_atomic(dev, "cannot find object ID %d", obj_id); ret = -ENOENT; goto out; } if (!obj->properties) { drm_dbg_atomic(dev, "[OBJECT:%d] has no properties", obj_id); drm_mode_object_put(obj); ret = -ENOENT; goto out; } if (get_user(count_props, count_props_ptr + copied_objs)) { drm_mode_object_put(obj); ret = -EFAULT; goto out; } copied_objs++; for (j = 0; j < count_props; j++) { uint32_t prop_id; uint64_t prop_value; struct drm_property *prop; if (get_user(prop_id, props_ptr + copied_props)) { drm_mode_object_put(obj); ret = -EFAULT; goto out; } prop = drm_mode_obj_find_prop_id(obj, prop_id); if (!prop) { drm_dbg_atomic(dev, "[OBJECT:%d] cannot find property ID %d", obj_id, prop_id); drm_mode_object_put(obj); ret = -ENOENT; goto out; } if (copy_from_user(&prop_value, prop_values_ptr + copied_props, sizeof(prop_value))) { drm_mode_object_put(obj); ret = -EFAULT; goto out; } ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value, async_flip); if (ret) { drm_mode_object_put(obj); goto out; } copied_props++; } drm_mode_object_put(obj); } ret = prepare_signaling(dev, state, arg, file_priv, &fence_state, &num_fences); if (ret) goto out; if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) set_async_flip(state); if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) { ret = drm_atomic_check_only(state); } else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) { ret = drm_atomic_nonblocking_commit(state); } else { ret = drm_atomic_commit(state); } out: complete_signaling(dev, state, fence_state, num_fences, !ret); if (ret == -EDEADLK) { drm_atomic_state_clear(state); ret = drm_modeset_backoff(&ctx); if (!ret) goto retry; } drm_atomic_state_put(state); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; }
3 3 1 1 3 1 2 2 2 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 // SPDX-License-Identifier: GPL-2.0 /* * xfrm4_policy.c * * Changes: * Kazunori MIYAZAWA @USAGI * YOSHIFUJI Hideaki @USAGI * Split up af-specific portion * */ #include <linux/err.h> #include <linux/kernel.h> #include <linux/inetdevice.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/l3mdev.h> static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, u32 mark) { struct rtable *rt; memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; rt = __ip_route_output_key(net, fl4); if (!IS_ERR(rt)) return &rt->dst; return ERR_CAST(rt); } static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, u32 mark) { struct flowi4 fl4; return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); } static int xfrm4_get_saddr(struct net *net, int oif, xfrm_address_t *saddr, xfrm_address_t *daddr, u32 mark) { struct dst_entry *dst; struct flowi4 fl4; dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; saddr->a4 = fl4.saddr; dst_release(dst); return 0; } static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt.rt_is_input = rt->rt_is_input; xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_gw_family = rt->rt_gw_family; if (rt->rt_gw_family == AF_INET) xdst->u.rt.rt_gw4 = rt->rt_gw4; else if (rt->rt_gw_family == AF_INET6) xdst->u.rt.rt_gw6 = rt->rt_gw6; xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; rt_add_uncached_list(&xdst->u.rt); return 0; } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->redirect(path, sk, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); rt_del_uncached_list(&xdst->u.rt); xfrm_dst_destroy(xdst); } static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm_dst_ifdown, .local_out = __ip_local_out, .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, .fill_dst = xfrm4_fill_dst, .blackhole_route = ipv4_blackhole_route, }; #ifdef CONFIG_SYSCTL static struct ctl_table xfrm4_policy_table[] = { { .procname = "xfrm4_gc_thresh", .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { } }; static __net_init int xfrm4_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = xfrm4_policy_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL); if (!table) goto err_alloc; table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh; } hdr = register_net_sysctl_sz(net, "net/ipv4", table, ARRAY_SIZE(xfrm4_policy_table)); if (!hdr) goto err_reg; net->ipv4.xfrm4_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static __net_exit void xfrm4_net_sysctl_exit(struct net *net) { struct ctl_table *table; if (!net->ipv4.xfrm4_hdr) return; table = net->ipv4.xfrm4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.xfrm4_hdr); if (!net_eq(net, &init_net)) kfree(table); } #else /* CONFIG_SYSCTL */ static inline int xfrm4_net_sysctl_init(struct net *net) { return 0; } static inline void xfrm4_net_sysctl_exit(struct net *net) { } #endif static int __net_init xfrm4_net_init(struct net *net) { int ret; memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template, sizeof(xfrm4_dst_ops_template)); ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops); if (ret) return ret; ret = xfrm4_net_sysctl_init(net); if (ret) dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); return ret; } static void __net_exit xfrm4_net_exit(struct net *net) { xfrm4_net_sysctl_exit(net); dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); } static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, }; static void __init xfrm4_policy_init(void) { xfrm_policy_register_afinfo(&xfrm4_policy_afinfo, AF_INET); } void __init xfrm4_init(void) { xfrm4_state_init(); xfrm4_policy_init(); xfrm4_protocol_init(); register_pernet_subsys(&xfrm4_net_ops); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0 */ /* * BlueZ - Bluetooth protocol stack for Linux * * Copyright (C) 2021 Intel Corporation */ #include <asm/unaligned.h> void eir_create(struct hci_dev *hdev, u8 *data); u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len); u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len); u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data, u8 data_len); static inline u16 eir_precalc_len(u8 data_len) { return sizeof(u8) * 2 + data_len; } static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { eir[eir_len++] = sizeof(type) + data_len; eir[eir_len++] = type; memcpy(&eir[eir_len], data, data_len); eir_len += data_len; return eir_len; } static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) { eir[eir_len++] = sizeof(type) + sizeof(data); eir[eir_len++] = type; put_unaligned_le16(data, &eir[eir_len]); eir_len += sizeof(data); return eir_len; } static inline u16 eir_skb_put_data(struct sk_buff *skb, u8 type, u8 *data, u8 data_len) { u8 *eir; u16 eir_len; eir_len = eir_precalc_len(data_len); eir = skb_put(skb, eir_len); WARN_ON(sizeof(type) + data_len > U8_MAX); eir[0] = sizeof(type) + data_len; eir[1] = type; memcpy(&eir[2], data, data_len); return eir_len; } static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, size_t *data_len) { size_t parsed = 0; if (eir_len < 2) return NULL; while (parsed < eir_len - 1) { u8 field_len = eir[0]; if (field_len == 0) break; parsed += field_len + 1; if (parsed > eir_len) break; if (eir[1] != type) { eir += field_len + 1; continue; } /* Zero length data */ if (field_len == 1) return NULL; if (data_len) *data_len = field_len - 1; return &eir[2]; } return NULL; } void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len);
26 17 17 14 26 26 26 25 20 14 14 14 3 26 26 26 3 3 26 145 99 146 49 146 177 175 112 112 112 112 8 8 8 8 1 8 1 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 // SPDX-License-Identifier: GPL-2.0 #include <linux/tcp.h> #include <net/tcp.h> static u32 tcp_rack_reo_wnd(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); if (!tp->reord_seen) { /* If reordering has not been observed, be aggressive during * the recovery or starting the recovery by DUPACK threshold. */ if (inet_csk(sk)->icsk_ca_state >= TCP_CA_Recovery) return 0; if (tp->sacked_out >= tp->reordering && !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & TCP_RACK_NO_DUPTHRESH)) return 0; } /* To be more reordering resilient, allow min_rtt/4 settling delay. * Use min_rtt instead of the smoothed RTT because reordering is * often a path property and less related to queuing or delayed ACKs. * Upon receiving DSACKs, linearly increase the window up to the * smoothed RTT. */ return min((tcp_min_rtt(tp) >> 2) * tp->rack.reo_wnd_steps, tp->srtt_us >> 3); } s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd) { return tp->rack.rtt_us + reo_wnd - tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb)); } /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01): * * Marks a packet lost, if some packet sent later has been (s)acked. * The underlying idea is similar to the traditional dupthresh and FACK * but they look at different metrics: * * dupthresh: 3 OOO packets delivered (packet count) * FACK: sequence delta to highest sacked sequence (sequence space) * RACK: sent time delta to the latest delivered packet (time domain) * * The advantage of RACK is it applies to both original and retransmitted * packet and therefore is robust against tail losses. Another advantage * is being more resilient to reordering by simply allowing some * "settling delay", instead of tweaking the dupthresh. * * When tcp_rack_detect_loss() detects some packets are lost and we * are not already in the CA_Recovery state, either tcp_rack_reo_timeout() * or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will * make us enter the CA_Recovery state. */ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb, *n; u32 reo_wnd; *reo_timeout = 0; reo_wnd = tcp_rack_reo_wnd(sk); list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, tcp_tsorted_anchor) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); s32 remaining; /* Skip ones marked lost but not yet retransmitted */ if ((scb->sacked & TCPCB_LOST) && !(scb->sacked & TCPCB_SACKED_RETRANS)) continue; if (!tcp_skb_sent_after(tp->rack.mstamp, tcp_skb_timestamp_us(skb), tp->rack.end_seq, scb->end_seq)) break; /* A packet is lost if it has not been s/acked beyond * the recent RTT plus the reordering window. */ remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd); if (remaining <= 0) { tcp_mark_skb_lost(sk, skb); list_del_init(&skb->tcp_tsorted_anchor); } else { /* Record maximum wait time */ *reo_timeout = max_t(u32, *reo_timeout, remaining); } } } bool tcp_rack_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout; if (!tp->rack.advanced) return false; /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; tcp_rack_detect_loss(sk, &timeout); if (timeout) { timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US); inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } return !!timeout; } /* Record the most recently (re)sent time among the (s)acked packets * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from * draft-cheng-tcpm-rack-00.txt */ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time) { u32 rtt_us; rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior * retransmission) was sacked. * * If the original is lost, there is no ambiguity. Otherwise * we assume the original can be delayed up to aRTT + min_rtt. * the aRTT term is bounded by the fast recovery or timeout, * so it's at least one RTT (i.e., retransmission is at least * an RTT later). */ return; } tp->rack.advanced = 1; tp->rack.rtt_us = rtt_us; if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp, end_seq, tp->rack.end_seq)) { tp->rack.mstamp = xmit_time; tp->rack.end_seq = end_seq; } } /* We have waited long enough to accommodate reordering. Mark the expired * packets lost and retransmit them. */ void tcp_rack_reo_timeout(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout, prior_inflight; u32 lost = tp->lost; prior_inflight = tcp_packets_in_flight(tp); tcp_rack_detect_loss(sk, &timeout); if (prior_inflight != tcp_packets_in_flight(tp)) { if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) { tcp_enter_recovery(sk, false); if (!inet_csk(sk)->icsk_ca_ops->cong_control) tcp_cwnd_reduction(sk, 1, tp->lost - lost, 0); } tcp_xmit_retransmit_queue(sk); } if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) tcp_rearm_rto(sk); } /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. * * If a DSACK is received that seems like it may have been due to reordering * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded * by srtt), since there is possibility that spurious retransmission was * due to reordering delay longer than reo_wnd. * * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) * no. of successful recoveries (accounts for full DSACK-based loss * recovery undo). After that, reset it to default (min_rtt/4). * * At max, reo_wnd is incremented only once per rtt. So that the new * DSACK on which we are reacting, is due to the spurious retx (approx) * after the reo_wnd has been updated last time. * * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than * absolute value to account for change in rtt. */ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & TCP_RACK_STATIC_REO_WND) || !rs->prior_delivered) return; /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ if (before(rs->prior_delivered, tp->rack.last_delivered)) tp->rack.dsack_seen = 0; /* Adjust the reo_wnd if update is pending */ if (tp->rack.dsack_seen) { tp->rack.reo_wnd_steps = min_t(u32, 0xFF, tp->rack.reo_wnd_steps + 1); tp->rack.dsack_seen = 0; tp->rack.last_delivered = tp->delivered; tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; } else if (!tp->rack.reo_wnd_persist) { tp->rack.reo_wnd_steps = 1; } } /* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits * the next unacked packet upon receiving * a) three or more DUPACKs to start the fast recovery * b) an ACK acknowledging new data during the fast recovery. */ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced) { const u8 state = inet_csk(sk)->icsk_ca_state; struct tcp_sock *tp = tcp_sk(sk); if ((state < TCP_CA_Recovery && tp->sacked_out >= tp->reordering) || (state == TCP_CA_Recovery && snd_una_advanced)) { struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 mss; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) return; mss = tcp_skb_mss(skb); if (tcp_skb_pcount(skb) > 1 && skb->len > mss) tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, mss, mss, GFP_ATOMIC); tcp_mark_skb_lost(sk, skb); } }
4 4 4 3 2 1 1 2 2 2 2 4 2 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * MMU support * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> */ /* * The MMU needs to be able to access/walk 32-bit and 64-bit guest page tables, * as well as guest EPT tables, so the code in this file is compiled thrice, * once per guest PTE type. The per-type defines are #undef'd at the end. */ #if PTTYPE == 64 #define pt_element_t u64 #define guest_walker guest_walker64 #define FNAME(name) paging##64_##name #define PT_LEVEL_BITS 9 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 #define pt_element_t u32 #define guest_walker guest_walker32 #define FNAME(name) paging##32_##name #define PT_LEVEL_BITS 10 #define PT_MAX_FULL_LEVELS 2 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true #define PT32_DIR_PSE36_SIZE 4 #define PT32_DIR_PSE36_SHIFT 13 #define PT32_DIR_PSE36_MASK \ (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT #define FNAME(name) ept_##name #define PT_LEVEL_BITS 9 #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) (!(mmu)->cpu_role.base.ad_disabled) #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value #endif /* Common logic, but per-type values. These also need to be undefined. */ #define PT_BASE_ADDR_MASK ((pt_element_t)__PT_BASE_ADDR_MASK) #define PT_LVL_ADDR_MASK(lvl) __PT_LVL_ADDR_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS) #define PT_LVL_OFFSET_MASK(lvl) __PT_LVL_OFFSET_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS) #define PT_INDEX(addr, lvl) __PT_INDEX(addr, lvl, PT_LEVEL_BITS) #define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT) #define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT) #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PG_LEVEL_4K) /* * The guest_walker structure emulates the behavior of the hardware page * table walker. */ struct guest_walker { int level; unsigned max_level; gfn_t table_gfn[PT_MAX_FULL_LEVELS]; pt_element_t ptes[PT_MAX_FULL_LEVELS]; pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned int pt_access[PT_MAX_FULL_LEVELS]; unsigned int pte_access; gfn_t gfn; struct x86_exception fault; }; #if PTTYPE == 32 static inline gfn_t pse36_gfn_delta(u32 gpte) { int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT; return (gpte & PT32_DIR_PSE36_MASK) << shift; } #endif static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) { return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access, unsigned gpte) { unsigned mask; /* dirty bit is not supported, so no need to track it */ if (!PT_HAVE_ACCESSED_DIRTY(mmu)) return; BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); mask = (unsigned)~ACC_WRITE_MASK; /* Allow write access to dirty gptes */ mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; *access &= mask; } static inline int FNAME(is_present_gpte)(unsigned long pte) { #if PTTYPE != PTTYPE_EPT return pte & PT_PRESENT_MASK; #else return pte & 7; #endif } static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte) { #if PTTYPE != PTTYPE_EPT return false; #else return __is_bad_mt_xwr(rsvd_check, gpte); #endif } static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) { return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) || FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte); } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) { if (!FNAME(is_present_gpte)(gpte)) goto no_present; /* Prefetch only accessed entries (unless A/D bits are disabled). */ if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K)) goto no_present; return false; no_present: drop_spte(vcpu->kvm, spte); return true; } /* * For PTTYPE_EPT, a page table can be executable but not readable * on supported processors. Therefore, set_spte does not automatically * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK * to signify readability since it isn't used in the EPT case */ static inline unsigned FNAME(gpte_access)(u64 gpte) { unsigned access; #if PTTYPE == PTTYPE_EPT access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0); #else BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); BUILD_BUG_ON(ACC_EXEC_MASK != 1); access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK); /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */ access ^= (gpte >> PT64_NX_SHIFT); #endif return access; } static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, gpa_t addr, int write_fault) { unsigned level, index; pt_element_t pte, orig_pte; pt_element_t __user *ptep_user; gfn_t table_gfn; int ret; /* dirty/accessed bits are not supported, so no need to update them */ if (!PT_HAVE_ACCESSED_DIRTY(mmu)) return 0; for (level = walker->max_level; level >= walker->level; --level) { pte = orig_pte = walker->ptes[level - 1]; table_gfn = walker->table_gfn[level - 1]; ptep_user = walker->ptep_user[level - 1]; index = offset_in_page(ptep_user) / sizeof(pt_element_t); if (!(pte & PT_GUEST_ACCESSED_MASK)) { trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); pte |= PT_GUEST_ACCESSED_MASK; } if (level == walker->level && write_fault && !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); #if PTTYPE == PTTYPE_EPT if (kvm_x86_ops.nested_ops->write_log_dirty(vcpu, addr)) return -EINVAL; #endif pte |= PT_GUEST_DIRTY_MASK; } if (pte == orig_pte) continue; /* * If the slot is read-only, simply do not process the accessed * and dirty bits. This is the correct thing to do if the slot * is ROM, and page tables in read-as-ROM/write-as-MMIO slots * are only supported if the accessed and dirty bits are already * set in the ROM (so that MMIO writes are never needed). * * Note that NPT does not allow this at all and faults, since * it always wants nested page table entries for the guest * page tables to be writable. And EPT works but will simply * overwrite the read-only memory to set the accessed and dirty * bits. */ if (unlikely(!walker->pte_writable[level - 1])) continue; ret = __try_cmpxchg_user(ptep_user, &orig_pte, pte, fault); if (ret) return ret; kvm_vcpu_mark_page_dirty(vcpu, table_gfn); walker->ptes[level - 1] = pte; } return 0; } static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) { unsigned pkeys = 0; #if PTTYPE == 64 pte_t pte = {.pte = gpte}; pkeys = pte_flags_pkey(pte_flags(pte)); #endif return pkeys; } static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu, unsigned int level, unsigned int gpte) { /* * For EPT and PAE paging (both variants), bit 7 is either reserved at * all level or indicates a huge page (ignoring CR3/EPTP). In either * case, bit 7 being set terminates the walk. */ #if PTTYPE == 32 /* * 32-bit paging requires special handling because bit 7 is ignored if * CR4.PSE=0, not reserved. Clear bit 7 in the gpte if the level is * greater than the last level for which bit 7 is the PAGE_SIZE bit. * * The RHS has bit 7 set iff level < (2 + PSE). If it is clear, bit 7 * is not reserved and does not indicate a large page at this level, * so clear PT_PAGE_SIZE_MASK in gpte if that is the case. */ gpte &= level - (PT32_ROOT_LEVEL + mmu->cpu_role.ext.cr4_pse); #endif /* * PG_LEVEL_4K always terminates. The RHS has bit 7 set * iff level <= PG_LEVEL_4K, which for our purpose means * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then. */ gpte |= level - PG_LEVEL_4K - 1; return gpte & PT_PAGE_SIZE_MASK; } /* * Fetch a guest pte for a guest virtual address, or for an L2's GPA. */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t addr, u64 access) { int ret; pt_element_t pte; pt_element_t __user *ptep_user; gfn_t table_gfn; u64 pt_access, pte_access; unsigned index, accessed_dirty, pte_pkey; u64 nested_access; gpa_t pte_gpa; bool have_ad; int offset; u64 walk_nx_mask = 0; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; const int fetch_fault = access & PFERR_FETCH_MASK; u16 errcode = 0; gpa_t real_gpa; gfn_t gfn; trace_kvm_mmu_pagetable_walk(addr, access); retry_walk: walker->level = mmu->cpu_role.base.level; pte = kvm_mmu_get_guest_pgd(vcpu, mmu); have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); #if PTTYPE == 64 walk_nx_mask = 1ULL << PT64_NX_SHIFT; if (walker->level == PT32E_ROOT_LEVEL) { pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); if (!FNAME(is_present_gpte)(pte)) goto error; --walker->level; } #endif walker->max_level = walker->level; /* * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging * by the MOV to CR instruction are treated as reads and do not cause the * processor to set the dirty flag in any EPT paging-structure entry. */ nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; pte_access = ~0; /* * Queue a page fault for injection if this assertion fails, as callers * assume that walker.fault contains sane info on a walk failure. I.e. * avoid making the situation worse by inducing even worse badness * between when the assertion fails and when KVM kicks the vCPU out to * userspace (because the VM is bugged). */ if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm)) goto error; ++walker->level; do { struct kvm_memory_slot *slot; unsigned long host_addr; pt_access = pte_access; --walker->level; index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); offset = index * sizeof(pt_element_t); pte_gpa = gfn_to_gpa(table_gfn) + offset; BUG_ON(walker->level < 1); walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn), nested_access, &walker->fault); /* * FIXME: This can happen if emulation (for of an INS/OUTS * instruction) triggers a nested page fault. The exit * qualification / exit info field will incorrectly have * "guest page access" as the nested page fault's cause, * instead of "guest page structure access". To fix this, * the x86_exception struct should be augmented with enough * information to fix the exit_qualification or exit_info_1 * fields. */ if (unlikely(real_gpa == INVALID_GPA)) return 0; slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa)); if (!kvm_is_visible_memslot(slot)) goto error; host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa), &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; ptep_user = (pt_element_t __user *)((void *)host_addr + offset); if (unlikely(__get_user(pte, ptep_user))) goto error; walker->ptep_user[walker->level - 1] = ptep_user; trace_kvm_mmu_paging_element(pte, walker->level); /* * Inverting the NX it lets us AND it like other * permission bits. */ pte_access = pt_access & (pte ^ walk_nx_mask); if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) { errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } walker->ptes[walker->level - 1] = pte; /* Convert to ACC_*_MASK flags for struct guest_walker. */ walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask); } while (!FNAME(is_last_gpte)(mmu, walker->level, pte)); pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; /* Convert to ACC_*_MASK flags for struct guest_walker. */ walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask); errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); if (unlikely(errcode)) goto error; gfn = gpte_to_gfn_lvl(pte, walker->level); gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; #if PTTYPE == 32 if (walker->level > PG_LEVEL_4K && is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); #endif real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn), access, &walker->fault); if (real_gpa == INVALID_GPA) return 0; walker->gfn = real_gpa >> PAGE_SHIFT; if (!write_fault) FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); else /* * On a write fault, fold the dirty bit into accessed_dirty. * For modes without A/D bits support accessed_dirty will be * always clear. */ accessed_dirty &= pte >> (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, addr, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) goto retry_walk; } return 1; error: errcode |= write_fault | user_fault; if (fetch_fault && (is_efer_nx(mmu) || is_cr4_smep(mmu))) errcode |= PFERR_FETCH_MASK; walker->fault.vector = PF_VECTOR; walker->fault.error_code_valid = true; walker->fault.error_code = errcode; #if PTTYPE == PTTYPE_EPT /* * Use PFERR_RSVD_MASK in error_code to tell if EPT * misconfiguration requires to be injected. The detection is * done by is_rsvd_bits_set() above. * * We set up the value of exit_qualification to inject: * [2:0] - Derive from the access bits. The exit_qualification might be * out of date if it is serving an EPT misconfiguration. * [5:3] - Calculated by the page walk of the guest EPT page tables * [7:8] - Derived from [7:8] of real exit_qualification * * The other bits are set to 0. */ if (!(errcode & PFERR_RSVD_MASK)) { vcpu->arch.exit_qualification &= (EPT_VIOLATION_GVA_IS_VALID | EPT_VIOLATION_GVA_TRANSLATED); if (write_fault) vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; if (user_fault) vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; if (fetch_fault) vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; /* * Note, pte_access holds the raw RWX bits from the EPTE, not * ACC_*_MASK flags! */ vcpu->arch.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) << EPT_VIOLATION_RWX_SHIFT; } #endif walker->fault.address = addr; walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; walker->fault.async_page_fault = false; trace_kvm_mmu_walker_error(walker->fault.error_code); return 0; } static int FNAME(walk_addr)(struct guest_walker *walker, struct kvm_vcpu *vcpu, gpa_t addr, u64 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); } static bool FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, pt_element_t gpte) { struct kvm_memory_slot *slot; unsigned pte_access; gfn_t gfn; kvm_pfn_t pfn; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) return false; gfn = gpte_to_gfn(gpte); pte_access = sp->role.access & FNAME(gpte_access)(gpte); FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte); slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, pte_access & ACC_WRITE_MASK); if (!slot) return false; pfn = gfn_to_pfn_memslot_atomic(slot, gfn); if (is_error_pfn(pfn)) return false; mmu_set_spte(vcpu, slot, spte, pte_access, gfn, pfn, NULL); kvm_release_pfn_clean(pfn); return true; } static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, struct guest_walker *gw, int level) { pt_element_t curr_pte; gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1]; u64 mask; int r, index; if (level == PG_LEVEL_4K) { mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1; base_gpa = pte_gpa & ~mask; index = (pte_gpa - base_gpa) / sizeof(pt_element_t); r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa, gw->prefetch_ptes, sizeof(gw->prefetch_ptes)); curr_pte = gw->prefetch_ptes[index]; } else r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &curr_pte, sizeof(curr_pte)); return r || curr_pte != gw->ptes[level - 1]; } static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, u64 *sptep) { struct kvm_mmu_page *sp; pt_element_t *gptep = gw->prefetch_ptes; u64 *spte; int i; sp = sptep_to_sp(sptep); if (sp->role.level > PG_LEVEL_4K) return; /* * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; if (sp->role.direct) return __direct_pte_prefetch(vcpu, sp, sptep); i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1); spte = sp->spt + i; for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { if (spte == sptep) continue; if (is_shadow_present_pte(*spte)) continue; if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i])) break; } } /* * Fetch a shadow pte for a specific level in the paging hierarchy. * If the guest tries to write a write-protected page, we need to * emulate this operation, return 1 to indicate this case. */ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, struct guest_walker *gw) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned int direct_access, access; int top_level, ret; gfn_t base_gfn = fault->gfn; WARN_ON_ONCE(gw->gfn != base_gfn); direct_access = gw->pte_access; top_level = vcpu->arch.mmu->cpu_role.base.level; if (top_level == PT32E_ROOT_LEVEL) top_level = PT32_ROOT_LEVEL; /* * Verify that the top-level gpte is still there. Since the page * is a root page, it is either write protected (and cannot be * changed from now on) or it is invalid (in which case, we don't * really care if it changes underneath us after this point). */ if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) goto out_gpte_changed; /* * Load a new root and retry the faulting instruction in the extremely * unlikely scenario that the guest root gfn became visible between * loading a dummy root and handling the resulting page fault, e.g. if * userspace create a memslot in the interim. */ if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) { kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu); goto out_gpte_changed; } for_each_shadow_entry(vcpu, fault->addr, it) { gfn_t table_gfn; clear_sp_write_flooding_count(it.sptep); if (it.level == gw->level) break; table_gfn = gw->table_gfn[it.level - 2]; access = gw->pt_access[it.level - 2]; sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn, false, access); if (sp != ERR_PTR(-EEXIST)) { /* * We must synchronize the pagetable before linking it * because the guest doesn't need to flush tlb when * the gpte is changed from non-present to present. * Otherwise, the guest may use the wrong mapping. * * For PG_LEVEL_4K, kvm_mmu_get_page() has already * synchronized it transiently via kvm_sync_page(). * * For higher level pagetable, we synchronize it via * the slower mmu_sync_children(). If it needs to * break, some progress has been made; return * RET_PF_RETRY and retry on the next #PF. * KVM_REQ_MMU_SYNC is not necessary but it * expedites the process. */ if (sp->unsync_children && mmu_sync_children(vcpu, sp, false)) return RET_PF_RETRY; } /* * Verify that the gpte in the page we've just write * protected is still there. */ if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) goto out_gpte_changed; if (sp != ERR_PTR(-EEXIST)) link_shadow_page(vcpu, it.sptep, sp); if (fault->write && table_gfn == fault->gfn) fault->write_fault_to_shadow_pgtable = true; } /* * Adjust the hugepage size _after_ resolving indirect shadow pages. * KVM doesn't support mapping hugepages into the guest for gfns that * are being shadowed by KVM, i.e. allocating a new shadow page may * affect the allowed hugepage size. */ kvm_mmu_hugepage_adjust(vcpu, fault); trace_kvm_mmu_spte_requested(fault); for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { /* * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. */ if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; validate_direct_spte(vcpu, it.sptep, direct_access); sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, direct_access); if (sp == ERR_PTR(-EEXIST)) continue; link_shadow_page(vcpu, it.sptep, sp); if (fault->huge_page_disallowed) account_nx_huge_page(vcpu->kvm, sp, fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) return -EFAULT; ret = mmu_set_spte(vcpu, fault->slot, it.sptep, gw->pte_access, base_gfn, fault->pfn, fault); if (ret == RET_PF_SPURIOUS) return ret; FNAME(pte_prefetch)(vcpu, gw, it.sptep); return ret; out_gpte_changed: return RET_PF_RETRY; } /* * Page fault handler. There are several causes for a page fault: * - there is no shadow pte for the guest pte * - write access through a shadow pte marked read only so that we can set * the dirty bit * - write access to a shadow pte marked read only so we can update the page * dirty bitmap, when userspace requests it * - mmio access; in this case we will never install a present shadow pte * - normal guest page fault due to the guest pte marked not present, not * writable, or not executable * * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct guest_walker walker; int r; WARN_ON_ONCE(fault->is_tdp); /* * Look up the guest pte for the faulting address. * If PFEC.RSVD is set, this is a shadow page fault. * The bit needs to be cleared before walking guest page tables. */ r = FNAME(walk_addr)(&walker, vcpu, fault->addr, fault->error_code & ~PFERR_RSVD_MASK); /* * The page is not mapped by the guest. Let the guest handle it. */ if (!r) { if (!fault->prefetch) kvm_inject_emulated_page_fault(vcpu, &walker.fault); return RET_PF_RETRY; } fault->gfn = walker.gfn; fault->max_level = walker.level; fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn); if (page_fault_handle_page_track(vcpu, fault)) { shadow_page_table_clear_flood(vcpu, fault->addr); return RET_PF_EMULATE; } r = mmu_topup_memory_caches(vcpu, true); if (r) return r; r = kvm_faultin_pfn(vcpu, fault, walker.pte_access); if (r != RET_PF_CONTINUE) return r; /* * Do not change pte_access if the pfn is a mmio page, otherwise * we will cache the incorrect access into mmio spte. */ if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) && !is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) { walker.pte_access |= ACC_WRITE_MASK; walker.pte_access &= ~ACC_USER_MASK; /* * If we converted a user page to a kernel page, * so that the kernel can write to it when cr0.wp=0, * then we should prevent the kernel from executing it * if SMEP is enabled. */ if (is_cr4_smep(vcpu->arch.mmu)) walker.pte_access &= ~ACC_EXEC_MASK; } r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) goto out_unlock; r = FNAME(fetch)(vcpu, fault, &walker); out_unlock: write_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(fault->pfn); return r; } static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) { int offset = 0; WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K); if (PTTYPE == 32) offset = sp->role.quadrant << SPTE_LEVEL_BITS; return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); } /* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t addr, u64 access, struct x86_exception *exception) { struct guest_walker walker; gpa_t gpa = INVALID_GPA; int r; #ifndef CONFIG_X86_64 /* A 64-bit GVA should be impossible on 32-bit KVM. */ WARN_ON_ONCE((addr >> 32) && mmu == vcpu->arch.walk_mmu); #endif r = FNAME(walk_addr_generic)(&walker, vcpu, mmu, addr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); gpa |= addr & ~PAGE_MASK; } else if (exception) *exception = walker.fault; return gpa; } /* * Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is * safe because: * - The spte has a reference to the struct page, so the pfn for a given gfn * can't change unless all sptes pointing to it are nuked first. * * Returns * < 0: failed to sync spte * 0: the spte is synced and no tlb flushing is required * > 0: the spte is synced and tlb flushing is required */ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i) { bool host_writable; gpa_t first_pte_gpa; u64 *sptep, spte; struct kvm_memory_slot *slot; unsigned pte_access; pt_element_t gpte; gpa_t pte_gpa; gfn_t gfn; if (WARN_ON_ONCE(!sp->spt[i])) return 0; first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, sizeof(pt_element_t))) return -1; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) return 1; gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; pte_access &= FNAME(gpte_access)(gpte); FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte); if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access)) return 0; /* * Drop the SPTE if the new protections would result in a RWX=0 * SPTE or if the gfn is changing. The RWX=0 case only affects * EPT with execute-only support, i.e. EPT without an effective * "present" bit, as all other paging modes will create a * read-only SPTE if pte_access is zero. */ if ((!pte_access && !shadow_present_mask) || gfn != kvm_mmu_page_get_gfn(sp, i)) { drop_spte(vcpu->kvm, &sp->spt[i]); return 1; } /* * Do nothing if the permissions are unchanged. The existing SPTE is * still, and prefetch_invalid_gpte() has verified that the A/D bits * are set in the "new" gPTE, i.e. there is no danger of missing an A/D * update due to A/D bits being set in the SPTE but not the gPTE. */ if (kvm_mmu_page_get_access(sp, i) == pte_access) return 0; /* Update the shadowed access bits in case they changed. */ kvm_mmu_page_set_access(sp, i, pte_access); sptep = &sp->spt[i]; spte = *sptep; host_writable = spte & shadow_host_writable_mask; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); make_spte(vcpu, sp, slot, pte_access, gfn, spte_to_pfn(spte), spte, true, false, host_writable, &spte); return mmu_spte_update(sptep, spte); } #undef pt_element_t #undef guest_walker #undef FNAME #undef PT_BASE_ADDR_MASK #undef PT_INDEX #undef PT_LVL_ADDR_MASK #undef PT_LVL_OFFSET_MASK #undef PT_LEVEL_BITS #undef PT_MAX_FULL_LEVELS #undef gpte_to_gfn #undef gpte_to_gfn_lvl #undef PT_GUEST_ACCESSED_MASK #undef PT_GUEST_DIRTY_MASK #undef PT_GUEST_DIRTY_SHIFT #undef PT_GUEST_ACCESSED_SHIFT #undef PT_HAVE_ACCESSED_DIRTY
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ #ifndef __ND_H__ #define __ND_H__ #include <linux/libnvdimm.h> #include <linux/badblocks.h> #include <linux/blkdev.h> #include <linux/device.h> #include <linux/mutex.h> #include <linux/ndctl.h> #include <linux/types.h> #include <linux/nd.h> #include "label.h" enum { /* * Limits the maximum number of block apertures a dimm can * support and is an input to the geometry/on-disk-format of a * BTT instance */ ND_MAX_LANES = 256, INT_LBASIZE_ALIGNMENT = 64, NVDIMM_IO_ATOMIC = 1, }; struct nvdimm_drvdata { struct device *dev; int nslabel_size; struct nd_cmd_get_config_size nsarea; void *data; bool cxl; int ns_current, ns_next; struct resource dpa; struct kref kref; }; static inline const u8 *nsl_ref_name(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return nd_label->cxl.name; return nd_label->efi.name; } static inline u8 *nsl_get_name(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u8 *name) { if (ndd->cxl) return memcpy(name, nd_label->cxl.name, NSLABEL_NAME_LEN); return memcpy(name, nd_label->efi.name, NSLABEL_NAME_LEN); } static inline u8 *nsl_set_name(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u8 *name) { if (!name) return NULL; if (ndd->cxl) return memcpy(nd_label->cxl.name, name, NSLABEL_NAME_LEN); return memcpy(nd_label->efi.name, name, NSLABEL_NAME_LEN); } static inline u32 nsl_get_slot(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return __le32_to_cpu(nd_label->cxl.slot); return __le32_to_cpu(nd_label->efi.slot); } static inline void nsl_set_slot(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u32 slot) { if (ndd->cxl) nd_label->cxl.slot = __cpu_to_le32(slot); else nd_label->efi.slot = __cpu_to_le32(slot); } static inline u64 nsl_get_checksum(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return __le64_to_cpu(nd_label->cxl.checksum); return __le64_to_cpu(nd_label->efi.checksum); } static inline void nsl_set_checksum(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u64 checksum) { if (ndd->cxl) nd_label->cxl.checksum = __cpu_to_le64(checksum); else nd_label->efi.checksum = __cpu_to_le64(checksum); } static inline u32 nsl_get_flags(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return __le32_to_cpu(nd_label->cxl.flags); return __le32_to_cpu(nd_label->efi.flags); } static inline void nsl_set_flags(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u32 flags) { if (ndd->cxl) nd_label->cxl.flags = __cpu_to_le32(flags); else nd_label->efi.flags = __cpu_to_le32(flags); } static inline u64 nsl_get_dpa(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return __le64_to_cpu(nd_label->cxl.dpa); return __le64_to_cpu(nd_label->efi.dpa); } static inline void nsl_set_dpa(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u64 dpa) { if (ndd->cxl) nd_label->cxl.dpa = __cpu_to_le64(dpa); else nd_label->efi.dpa = __cpu_to_le64(dpa); } static inline u64 nsl_get_rawsize(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return __le64_to_cpu(nd_label->cxl.rawsize); return __le64_to_cpu(nd_label->efi.rawsize); } static inline void nsl_set_rawsize(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u64 rawsize) { if (ndd->cxl) nd_label->cxl.rawsize = __cpu_to_le64(rawsize); else nd_label->efi.rawsize = __cpu_to_le64(rawsize); } static inline u64 nsl_get_isetcookie(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { /* WARN future refactor attempts that break this assumption */ if (dev_WARN_ONCE(ndd->dev, ndd->cxl, "CXL labels do not use the isetcookie concept\n")) return 0; return __le64_to_cpu(nd_label->efi.isetcookie); } static inline void nsl_set_isetcookie(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u64 isetcookie) { if (!ndd->cxl) nd_label->efi.isetcookie = __cpu_to_le64(isetcookie); } static inline bool nsl_validate_isetcookie(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u64 cookie) { /* * Let the EFI and CXL validation comingle, where fields that * don't matter to CXL always validate. */ if (ndd->cxl) return true; return cookie == __le64_to_cpu(nd_label->efi.isetcookie); } static inline u16 nsl_get_position(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return __le16_to_cpu(nd_label->cxl.position); return __le16_to_cpu(nd_label->efi.position); } static inline void nsl_set_position(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u16 position) { if (ndd->cxl) nd_label->cxl.position = __cpu_to_le16(position); else nd_label->efi.position = __cpu_to_le16(position); } static inline u16 nsl_get_nlabel(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return 0; return __le16_to_cpu(nd_label->efi.nlabel); } static inline void nsl_set_nlabel(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u16 nlabel) { if (!ndd->cxl) nd_label->efi.nlabel = __cpu_to_le16(nlabel); } static inline u16 nsl_get_nrange(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return __le16_to_cpu(nd_label->cxl.nrange); return 1; } static inline void nsl_set_nrange(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u16 nrange) { if (ndd->cxl) nd_label->cxl.nrange = __cpu_to_le16(nrange); } static inline u64 nsl_get_lbasize(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { /* * Yes, for some reason the EFI labels convey a massive 64-bit * lbasize, that got fixed for CXL. */ if (ndd->cxl) return __le16_to_cpu(nd_label->cxl.lbasize); return __le64_to_cpu(nd_label->efi.lbasize); } static inline void nsl_set_lbasize(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, u64 lbasize) { if (ndd->cxl) nd_label->cxl.lbasize = __cpu_to_le16(lbasize); else nd_label->efi.lbasize = __cpu_to_le64(lbasize); } static inline const uuid_t *nsl_get_uuid(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, uuid_t *uuid) { if (ndd->cxl) import_uuid(uuid, nd_label->cxl.uuid); else import_uuid(uuid, nd_label->efi.uuid); return uuid; } static inline const uuid_t *nsl_set_uuid(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, const uuid_t *uuid) { if (ndd->cxl) export_uuid(nd_label->cxl.uuid, uuid); else export_uuid(nd_label->efi.uuid, uuid); return uuid; } static inline bool nsl_uuid_equal(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, const uuid_t *uuid) { uuid_t tmp; if (ndd->cxl) import_uuid(&tmp, nd_label->cxl.uuid); else import_uuid(&tmp, nd_label->efi.uuid); return uuid_equal(&tmp, uuid); } static inline const u8 *nsl_uuid_raw(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return nd_label->cxl.uuid; return nd_label->efi.uuid; } bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label, guid_t *guid); enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label); struct nd_region_data { int ns_count; int ns_active; unsigned int hints_shift; void __iomem *flush_wpq[]; }; static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd, int dimm, int hint) { unsigned int num = 1 << ndrd->hints_shift; unsigned int mask = num - 1; return ndrd->flush_wpq[dimm * num + (hint & mask)]; } static inline void ndrd_set_flush_wpq(struct nd_region_data *ndrd, int dimm, int hint, void __iomem *flush) { unsigned int num = 1 << ndrd->hints_shift; unsigned int mask = num - 1; ndrd->flush_wpq[dimm * num + (hint & mask)] = flush; } static inline struct nd_namespace_index *to_namespace_index( struct nvdimm_drvdata *ndd, int i) { if (i < 0) return NULL; return ndd->data + sizeof_namespace_index(ndd) * i; } static inline struct nd_namespace_index *to_current_namespace_index( struct nvdimm_drvdata *ndd) { return to_namespace_index(ndd, ndd->ns_current); } static inline struct nd_namespace_index *to_next_namespace_index( struct nvdimm_drvdata *ndd) { return to_namespace_index(ndd, ndd->ns_next); } unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd); #define efi_namespace_label_has(ndd, field) \ (!ndd->cxl && offsetof(struct nvdimm_efi_label, field) \ < sizeof_namespace_label(ndd)) #define nd_dbg_dpa(r, d, res, fmt, arg...) \ dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \ (r) ? dev_name((d)->dev) : "", res ? res->name : "null", \ (unsigned long long) (res ? resource_size(res) : 0), \ (unsigned long long) (res ? res->start : 0), ##arg) #define for_each_dpa_resource(ndd, res) \ for (res = (ndd)->dpa.child; res; res = res->sibling) #define for_each_dpa_resource_safe(ndd, res, next) \ for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \ res; res = next, next = next ? next->sibling : NULL) struct nd_percpu_lane { int count; spinlock_t lock; }; enum nd_label_flags { ND_LABEL_REAP, }; struct nd_label_ent { struct list_head list; unsigned long flags; struct nd_namespace_label *label; }; enum nd_mapping_lock_class { ND_MAPPING_CLASS0, ND_MAPPING_UUID_SCAN, }; struct nd_mapping { struct nvdimm *nvdimm; u64 start; u64 size; int position; struct list_head labels; struct mutex lock; /* * @ndd is for private use at region enable / disable time for * get_ndd() + put_ndd(), all other nd_mapping to ndd * conversions use to_ndd() which respects enabled state of the * nvdimm. */ struct nvdimm_drvdata *ndd; }; struct nd_region { struct device dev; struct ida ns_ida; struct ida btt_ida; struct ida pfn_ida; struct ida dax_ida; unsigned long flags; struct device *ns_seed; struct device *btt_seed; struct device *pfn_seed; struct device *dax_seed; unsigned long align; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; int id, num_lanes, ro, numa_node, target_node; void *provider_data; struct kernfs_node *bb_state; struct badblocks bb; struct nd_interleave_set *nd_set; struct nd_percpu_lane __percpu *lane; int (*flush)(struct nd_region *nd_region, struct bio *bio); struct nd_mapping mapping[] __counted_by(ndr_mappings); }; static inline bool nsl_validate_nlabel(struct nd_region *nd_region, struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { if (ndd->cxl) return true; return nsl_get_nlabel(ndd, nd_label) == nd_region->ndr_mappings; } /* * Lookup next in the repeating sequence of 01, 10, and 11. */ static inline unsigned nd_inc_seq(unsigned seq) { static const unsigned next[] = { 0, 2, 3, 1 }; return next[seq & 3]; } struct btt; struct nd_btt { struct device dev; struct nd_namespace_common *ndns; struct btt *btt; unsigned long lbasize; u64 size; uuid_t *uuid; int id; int initial_offset; u16 version_major; u16 version_minor; }; enum nd_pfn_mode { PFN_MODE_NONE, PFN_MODE_RAM, PFN_MODE_PMEM, }; struct nd_pfn { int id; uuid_t *uuid; struct device dev; unsigned long align; unsigned long npfns; enum nd_pfn_mode mode; struct nd_pfn_sb *pfn_sb; struct nd_namespace_common *ndns; }; struct nd_dax { struct nd_pfn nd_pfn; }; static inline u32 nd_info_block_reserve(void) { return ALIGN(SZ_8K, PAGE_SIZE); } enum nd_async_mode { ND_SYNC, ND_ASYNC, }; int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); void wait_nvdimm_bus_probe_idle(struct device *dev); void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); void nd_device_notify(struct device *dev, enum nvdimm_event event); int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf, size_t len); ssize_t nd_size_select_show(unsigned long current_size, const unsigned long *supported, char *buf); ssize_t nd_size_select_store(struct device *dev, const char *buf, unsigned long *current_size, const unsigned long *supported); int __init nvdimm_init(void); int __init nd_region_init(void); int __init nd_label_init(void); void nvdimm_exit(void); void nd_region_exit(void); struct nvdimm; extern const struct attribute_group nd_device_attribute_group; extern const struct attribute_group nd_numa_attribute_group; extern const struct attribute_group *nvdimm_bus_attribute_groups[]; struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping); int nvdimm_check_config_data(struct device *dev); int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf, size_t offset, size_t len); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, unsigned int len); void nvdimm_set_labeling(struct device *dev); void nvdimm_set_locked(struct device *dev); void nvdimm_clear_locked(struct device *dev); int nvdimm_security_setup_events(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_KEYS) int nvdimm_security_unlock(struct device *dev); #else static inline int nvdimm_security_unlock(struct device *dev) { return 0; } #endif struct nd_btt *to_nd_btt(struct device *dev); struct nd_gen_sb { char reserved[SZ_4K - 8]; __le64 checksum; }; u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else static inline int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) { return -ENODEV; } static inline bool is_nd_btt(struct device *dev) { return false; } static inline struct device *nd_btt_create(struct nd_region *nd_region) { return NULL; } #endif struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) #define MAX_NVDIMM_ALIGN 4 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns); int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig); extern const struct attribute_group *nd_pfn_attribute_groups[]; #else static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) { return -ENODEV; } static inline bool is_nd_pfn(struct device *dev) { return false; } static inline struct device *nd_pfn_create(struct nd_region *nd_region) { return NULL; } static inline int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { return -ENODEV; } #endif struct nd_dax *to_nd_dax(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_DAX) int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_dax(const struct device *dev); struct device *nd_dax_create(struct nd_region *nd_region); #else static inline int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) { return -ENODEV; } static inline bool is_nd_dax(const struct device *dev) { return false; } static inline struct device *nd_dax_create(struct nd_region *nd_region) { return NULL; } #endif int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); u64 nd_region_interleave_set_cookie(struct nd_region *nd_region, struct nd_namespace_index *nsindex); u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region); void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_unlock(struct device *dev); bool is_nvdimm_bus_locked(struct device *dev); void nvdimm_check_and_set_ro(struct gendisk *disk); void nvdimm_drvdata_release(struct kref *kref); void put_ndd(struct nvdimm_drvdata *ndd); int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd); void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res); struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id, resource_size_t start, resource_size_t n); resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns); bool nvdimm_namespace_locked(struct nd_namespace_common *ndns); struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev); int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt); const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); unsigned int pmem_sector_size(struct nd_namespace_common *ndns); struct range; void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct range *range); int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns, resource_size_t size); void devm_namespace_disable(struct device *dev, struct nd_namespace_common *ndns); #if IS_ENABLED(CONFIG_ND_CLAIM) /* max struct page size independent of kernel config */ #define MAX_STRUCT_PAGE_SIZE 64 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); #else static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) { return -ENXIO; } #endif int nd_region_activate(struct nd_region *nd_region); static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) { if (bb->count) { sector_t first_bad; int num_bad; return !!badblocks_check(bb, sector, len / 512, &first_bad, &num_bad); } return false; } const uuid_t *nd_dev_to_uuid(struct device *dev); bool pmem_should_map_pages(struct device *dev); #endif /* __ND_H__ */
109 129 129 99 2 99 2 99 99 99 99 99 2 2 2 99 98 99 99 99 99 99 99 99 112 112 111 101 112 112 111 111 112 112 112 112 112 34 98 99 99 99 112 112 112 13 112 112 112 112 112 112 112 128 129 129 38 129 110 2 2 110 110 110 112 104 1 103 129 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 88 88 88 49 49 87 88 88 88 87 33 33 33 1 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "btree_cache.h" #include "btree_iter.h" #include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update.h" #include "errcode.h" #include "error.h" #include "journal.h" #include "journal_reclaim.h" #include "trace.h" #include <linux/sched/mm.h> static inline bool btree_uses_pcpu_readers(enum btree_id id) { return id == BTREE_ID_subvolumes; } static struct kmem_cache *bch2_key_cache; static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { const struct bkey_cached *ck = obj; const struct bkey_cached_key *key = arg->key; return ck->key.btree_id != key->btree_id || !bpos_eq(ck->key.pos, key->pos); } static const struct rhashtable_params bch2_btree_key_cache_params = { .head_offset = offsetof(struct bkey_cached, hash), .key_offset = offsetof(struct bkey_cached, key), .key_len = sizeof(struct bkey_cached_key), .obj_cmpfn = bch2_btree_key_cache_cmp_fn, }; __flatten inline struct bkey_cached * bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos) { struct bkey_cached_key key = { .btree_id = btree_id, .pos = pos, }; return rhashtable_lookup_fast(&c->btree_key_cache.table, &key, bch2_btree_key_cache_params); } static bool bkey_cached_lock_for_evict(struct bkey_cached *ck) { if (!six_trylock_intent(&ck->c.lock)) return false; if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { six_unlock_intent(&ck->c.lock); return false; } if (!six_trylock_write(&ck->c.lock)) { six_unlock_intent(&ck->c.lock); return false; } return true; } static void bkey_cached_evict(struct btree_key_cache *c, struct bkey_cached *ck) { BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash, bch2_btree_key_cache_params)); memset(&ck->key, ~0, sizeof(ck->key)); atomic_long_dec(&c->nr_keys); } static void bkey_cached_free(struct btree_key_cache *bc, struct bkey_cached *ck) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); ck->btree_trans_barrier_seq = start_poll_synchronize_srcu(&c->btree_trans_barrier); if (ck->c.lock.readers) { list_move_tail(&ck->list, &bc->freed_pcpu); bc->nr_freed_pcpu++; } else { list_move_tail(&ck->list, &bc->freed_nonpcpu); bc->nr_freed_nonpcpu++; } atomic_long_inc(&bc->nr_freed); kfree(ck->k); ck->k = NULL; ck->u64s = 0; six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); } #ifdef __KERNEL__ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc, struct bkey_cached *ck) { struct bkey_cached *pos; bc->nr_freed_nonpcpu++; list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) { if (ULONG_CMP_GE(ck->btree_trans_barrier_seq, pos->btree_trans_barrier_seq)) { list_move(&ck->list, &pos->list); return; } } list_move(&ck->list, &bc->freed_nonpcpu); } #endif static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, struct bkey_cached *ck) { BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); if (!ck->c.lock.readers) { #ifdef __KERNEL__ struct btree_key_cache_freelist *f; bool freed = false; preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); if (f->nr < ARRAY_SIZE(f->objs)) { f->objs[f->nr++] = ck; freed = true; } preempt_enable(); if (!freed) { mutex_lock(&bc->lock); preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); while (f->nr > ARRAY_SIZE(f->objs) / 2) { struct bkey_cached *ck2 = f->objs[--f->nr]; __bkey_cached_move_to_freelist_ordered(bc, ck2); } preempt_enable(); __bkey_cached_move_to_freelist_ordered(bc, ck); mutex_unlock(&bc->lock); } #else mutex_lock(&bc->lock); list_move_tail(&ck->list, &bc->freed_nonpcpu); bc->nr_freed_nonpcpu++; mutex_unlock(&bc->lock); #endif } else { mutex_lock(&bc->lock); list_move_tail(&ck->list, &bc->freed_pcpu); bc->nr_freed_pcpu++; mutex_unlock(&bc->lock); } } static void bkey_cached_free_fast(struct btree_key_cache *bc, struct bkey_cached *ck) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); ck->btree_trans_barrier_seq = start_poll_synchronize_srcu(&c->btree_trans_barrier); list_del_init(&ck->list); atomic_long_inc(&bc->nr_freed); kfree(ck->k); ck->k = NULL; ck->u64s = 0; bkey_cached_move_to_freelist(bc, ck); six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); } static struct bkey_cached * bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, bool *was_new) { struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck = NULL; bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id); int ret; if (!pcpu_readers) { #ifdef __KERNEL__ struct btree_key_cache_freelist *f; preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); if (f->nr) ck = f->objs[--f->nr]; preempt_enable(); if (!ck) { mutex_lock(&bc->lock); preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); while (!list_empty(&bc->freed_nonpcpu) && f->nr < ARRAY_SIZE(f->objs) / 2) { ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); list_del_init(&ck->list); bc->nr_freed_nonpcpu--; f->objs[f->nr++] = ck; } ck = f->nr ? f->objs[--f->nr] : NULL; preempt_enable(); mutex_unlock(&bc->lock); } #else mutex_lock(&bc->lock); if (!list_empty(&bc->freed_nonpcpu)) { ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); list_del_init(&ck->list); bc->nr_freed_nonpcpu--; } mutex_unlock(&bc->lock); #endif } else { mutex_lock(&bc->lock); if (!list_empty(&bc->freed_pcpu)) { ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list); list_del_init(&ck->list); bc->nr_freed_pcpu--; } mutex_unlock(&bc->lock); } if (ck) { ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_); if (unlikely(ret)) { bkey_cached_move_to_freelist(bc, ck); return ERR_PTR(ret); } path->l[0].b = (void *) ck; path->l[0].lock_seq = six_lock_seq(&ck->c.lock); mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); ret = bch2_btree_node_lock_write(trans, path, &ck->c); if (unlikely(ret)) { btree_node_unlock(trans, path, 0); bkey_cached_move_to_freelist(bc, ck); return ERR_PTR(ret); } return ck; } ck = allocate_dropping_locks(trans, ret, kmem_cache_zalloc(bch2_key_cache, _gfp)); if (ret) { kmem_cache_free(bch2_key_cache, ck); return ERR_PTR(ret); } if (!ck) return NULL; INIT_LIST_HEAD(&ck->list); bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0); ck->c.cached = true; BUG_ON(!six_trylock_intent(&ck->c.lock)); BUG_ON(!six_trylock_write(&ck->c.lock)); *was_new = true; return ck; } static struct bkey_cached * bkey_cached_reuse(struct btree_key_cache *c) { struct bucket_table *tbl; struct rhash_head *pos; struct bkey_cached *ck; unsigned i; mutex_lock(&c->lock); rcu_read_lock(); tbl = rht_dereference_rcu(c->table.tbl, &c->table); for (i = 0; i < tbl->size; i++) rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(c, ck); goto out; } } ck = NULL; out: rcu_read_unlock(); mutex_unlock(&c->lock); return ck; } static struct bkey_cached * btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck; bool was_new = false; ck = bkey_cached_alloc(trans, path, &was_new); if (IS_ERR(ck)) return ck; if (unlikely(!ck)) { ck = bkey_cached_reuse(bc); if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", bch2_btree_id_str(path->btree_id)); return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create); } mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); } ck->c.level = 0; ck->c.btree_id = path->btree_id; ck->key.btree_id = path->btree_id; ck->key.pos = path->pos; ck->valid = false; ck->flags = 1U << BKEY_CACHED_ACCESSED; if (unlikely(rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params))) { /* We raced with another fill: */ if (likely(was_new)) { six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); kfree(ck); } else { bkey_cached_free_fast(bc, ck); } mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); return NULL; } atomic_long_inc(&bc->nr_keys); six_unlock_write(&ck->c.lock); return ck; } static int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, struct bkey_cached *ck) { struct btree_iter iter; struct bkey_s_c k; unsigned new_u64s = 0; struct bkey_i *new_k = NULL; int ret; bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos, BTREE_ITER_KEY_CACHE_FILL| BTREE_ITER_CACHED_NOFILL); iter.flags &= ~BTREE_ITER_WITH_JOURNAL; k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; if (!bch2_btree_node_relock(trans, ck_path, 0)) { trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); goto err; } /* * bch2_varint_decode can read past the end of the buffer by at * most 7 bytes (it won't be used): */ new_u64s = k.k->u64s + 1; /* * Allocate some extra space so that the transaction commit path is less * likely to have to reallocate, since that requires a transaction * restart: */ new_u64s = min(256U, (new_u64s * 3) / 2); if (new_u64s > ck->u64s) { new_u64s = roundup_pow_of_two(new_u64s); new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); if (!new_k) { bch2_trans_unlock(trans); new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); if (!new_k) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(ck->key.btree_id), new_u64s); ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; goto err; } if (!bch2_btree_node_relock(trans, ck_path, 0)) { kfree(new_k); trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); goto err; } ret = bch2_trans_relock(trans); if (ret) { kfree(new_k); goto err; } } } ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c); if (ret) { kfree(new_k); goto err; } if (new_k) { kfree(ck->k); ck->u64s = new_u64s; ck->k = new_k; } bkey_reassemble(ck->k, k); ck->valid = true; bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); /* We're not likely to need this iterator again: */ set_btree_iter_dontneed(&iter); err: bch2_trans_iter_exit(trans, &iter); return ret; } static noinline int bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, unsigned flags) { struct bch_fs *c = trans->c; struct bkey_cached *ck; int ret = 0; BUG_ON(path->level); path->l[1].b = NULL; if (bch2_btree_node_relock_notrace(trans, path, 0)) { ck = (void *) path->l[0].b; goto fill; } retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { ck = btree_key_cache_create(trans, path); ret = PTR_ERR_OR_ZERO(ck); if (ret) goto err; if (!ck) goto retry; mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); path->locks_want = 1; } else { enum six_lock_type lock_want = __btree_lock_want(path, 0); ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto err; BUG_ON(ret); if (ck->key.btree_id != path->btree_id || !bpos_eq(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } mark_btree_node_locked(trans, path, 0, (enum btree_node_locked_type) lock_want); } path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: path->uptodate = BTREE_ITER_UPTODATE; if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { /* * Using the underscore version because we haven't set * path->uptodate yet: */ if (!path->locks_want && !__bch2_btree_path_upgrade(trans, path, 1, NULL)) { trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade); goto err; } ret = btree_key_cache_fill(trans, path, ck); if (ret) goto err; ret = bch2_btree_path_relock(trans, path, _THIS_IP_); if (ret) goto err; path->uptodate = BTREE_ITER_UPTODATE; } if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); BUG_ON(path->uptodate); return ret; err: path->uptodate = BTREE_ITER_NEED_TRAVERSE; if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(ret); } return ret; } int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, unsigned flags) { struct bch_fs *c = trans->c; struct bkey_cached *ck; int ret = 0; EBUG_ON(path->level); path->l[1].b = NULL; if (bch2_btree_node_relock_notrace(trans, path, 0)) { ck = (void *) path->l[0].b; goto fill; } retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); } else { enum six_lock_type lock_want = __btree_lock_want(path, 0); ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_); EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)); if (ret) return ret; if (ck->key.btree_id != path->btree_id || !bpos_eq(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } mark_btree_node_locked(trans, path, 0, (enum btree_node_locked_type) lock_want); } path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: if (!ck->valid) return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); path->uptodate = BTREE_ITER_UPTODATE; EBUG_ON(!ck->valid); EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); return ret; } static int btree_key_cache_flush_pos(struct btree_trans *trans, struct bkey_cached_key key, u64 journal_seq, unsigned commit_flags, bool evict) { struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree_iter c_iter, b_iter; struct bkey_cached *ck = NULL; int ret; bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos, BTREE_ITER_SLOTS| BTREE_ITER_INTENT| BTREE_ITER_ALL_SNAPSHOTS); bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, BTREE_ITER_CACHED| BTREE_ITER_INTENT); b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE; ret = bch2_btree_iter_traverse(&c_iter); if (ret) goto out; ck = (void *) btree_iter_path(trans, &c_iter)->l[0].b; if (!ck) goto out; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { if (evict) goto evict; goto out; } BUG_ON(!ck->valid); if (journal_seq && ck->journal.seq != journal_seq) goto out; trans->journal_res.seq = ck->journal.seq; /* * If we're at the end of the journal, we really want to free up space * in the journal right away - we don't want to pin that old journal * sequence number with a new btree node write, we want to re-journal * the update */ if (ck->journal.seq == journal_last_seq(j)) commit_flags |= BCH_WATERMARK_reclaim; if (ck->journal.seq != journal_last_seq(j) || !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) commit_flags |= BCH_TRANS_COMMIT_no_journal_res; ret = bch2_btree_iter_traverse(&b_iter) ?: bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_KEY_CACHE_RECLAIM| BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_enospc| commit_flags); bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && !bch2_journal_error(j), c, "flushing key cache: %s", bch2_err_str(ret)); if (ret) goto out; bch2_journal_pin_drop(j, &ck->journal); struct btree_path *path = btree_iter_path(trans, &c_iter); BUG_ON(!btree_node_locked(path, 0)); if (!evict) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_dec(&c->btree_key_cache.nr_dirty); } } else { struct btree_path *path2; unsigned i; evict: trans_for_each_path(trans, path2, i) if (path2 != path) __bch2_btree_path_unlock(trans, path2); bch2_btree_node_lock_write_nofail(trans, path, &ck->c); if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_dec(&c->btree_key_cache.nr_dirty); } mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); bkey_cached_evict(&c->btree_key_cache, ck); bkey_cached_free_fast(&c->btree_key_cache, ck); } out: bch2_trans_iter_exit(trans, &b_iter); bch2_trans_iter_exit(trans, &c_iter); return ret; } int bch2_btree_key_cache_journal_flush(struct journal *j, struct journal_entry_pin *pin, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; struct btree_trans *trans = bch2_trans_get(c); int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); int ret = 0; btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read); key = ck->key; if (ck->journal.seq != seq || !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { six_unlock_read(&ck->c.lock); goto unlock; } if (ck->seq != seq) { bch2_journal_pin_update(&c->journal, ck->seq, &ck->journal, bch2_btree_key_cache_journal_flush); six_unlock_read(&ck->c.lock); goto unlock; } six_unlock_read(&ck->c.lock); ret = lockrestart_do(trans, btree_key_cache_flush_pos(trans, key, seq, BCH_TRANS_COMMIT_journal_reclaim, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); bch2_trans_put(trans); return ret; } bool bch2_btree_insert_key_cached(struct btree_trans *trans, unsigned flags, struct btree_insert_entry *insert_entry) { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) (trans->paths + insert_entry->path)->l[0].b; struct bkey_i *insert = insert_entry->k; bool kick_reclaim = false; BUG_ON(insert->k.u64s > ck->u64s); bkey_copy(ck->k, insert); ck->valid = true; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); set_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_inc(&c->btree_key_cache.nr_dirty); if (bch2_nr_btree_keys_need_flush(c)) kick_reclaim = true; } /* * To minimize lock contention, we only add the journal pin here and * defer pin updates to the flush callback via ->seq. Be careful not to * update ->seq on nojournal commits because we don't want to update the * pin to a seq that doesn't include journal updates on disk. Otherwise * we risk losing the update after a crash. * * The only exception is if the pin is not active in the first place. We * have to add the pin because journal reclaim drives key cache * flushing. The flush callback will not proceed unless ->seq matches * the latest pin, so make sure it starts with a consistent value. */ if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) || !journal_pin_active(&ck->journal)) { ck->seq = trans->journal_res.seq; } bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &ck->journal, bch2_btree_key_cache_journal_flush); if (kick_reclaim) journal_reclaim_kick(&c->journal); return true; } void bch2_btree_key_cache_drop(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; BUG_ON(!ck->valid); /* * We just did an update to the btree, bypassing the key cache: the key * cache key is now stale and must be dropped, even if dirty: */ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_dec(&c->btree_key_cache.nr_dirty); bch2_journal_pin_drop(&c->journal, &ck->journal); } ck->valid = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct bch_fs *c = shrink->private_data; struct btree_key_cache *bc = &c->btree_key_cache; struct bucket_table *tbl; struct bkey_cached *ck, *t; size_t scanned = 0, freed = 0, nr = sc->nr_to_scan; unsigned start, flags; int srcu_idx; mutex_lock(&bc->lock); srcu_idx = srcu_read_lock(&c->btree_trans_barrier); flags = memalloc_nofs_save(); /* * Newest freed entries are at the end of the list - once we hit one * that's too new to be freed, we can bail out: */ list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) break; list_del(&ck->list); six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); freed++; bc->nr_freed_nonpcpu--; } list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) break; list_del(&ck->list); six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); freed++; bc->nr_freed_pcpu--; } rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; start = bc->shrink_iter; do { struct rhash_head *pos, *next; pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter)); while (!rht_is_a_nulls(pos)) { next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); ck = container_of(pos, struct bkey_cached, hash); if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { goto next; } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); goto next; } else if (bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(bc, ck); bkey_cached_free(bc, ck); } scanned++; if (scanned >= nr) break; next: pos = next; } bc->shrink_iter++; if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; } while (scanned < nr && bc->shrink_iter != start); rcu_read_unlock(); memalloc_nofs_restore(flags); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); mutex_unlock(&bc->lock); return freed; } static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct bch_fs *c = shrink->private_data; struct btree_key_cache *bc = &c->btree_key_cache; long nr = atomic_long_read(&bc->nr_keys) - atomic_long_read(&bc->nr_dirty); return max(0L, nr); } void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); struct bucket_table *tbl; struct bkey_cached *ck, *n; struct rhash_head *pos; LIST_HEAD(items); unsigned i; #ifdef __KERNEL__ int cpu; #endif shrinker_free(bc->shrink); mutex_lock(&bc->lock); /* * The loop is needed to guard against racing with rehash: */ while (atomic_long_read(&bc->nr_keys)) { rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (tbl) for (i = 0; i < tbl->size; i++) rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { bkey_cached_evict(bc, ck); list_add(&ck->list, &items); } rcu_read_unlock(); } #ifdef __KERNEL__ if (bc->pcpu_freed) { for_each_possible_cpu(cpu) { struct btree_key_cache_freelist *f = per_cpu_ptr(bc->pcpu_freed, cpu); for (i = 0; i < f->nr; i++) { ck = f->objs[i]; list_add(&ck->list, &items); } } } #endif BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu); BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu); list_splice(&bc->freed_pcpu, &items); list_splice(&bc->freed_nonpcpu, &items); mutex_unlock(&bc->lock); list_for_each_entry_safe(ck, n, &items, list) { cond_resched(); list_del(&ck->list); kfree(ck->k); six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); } if (atomic_long_read(&bc->nr_dirty) && !bch2_journal_error(&c->journal) && test_bit(BCH_FS_was_rw, &c->flags)) panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n", atomic_long_read(&bc->nr_dirty)); if (atomic_long_read(&bc->nr_keys)) panic("btree key cache shutdown error: nr_keys nonzero (%li)\n", atomic_long_read(&bc->nr_keys)); if (bc->table_init_done) rhashtable_destroy(&bc->table); free_percpu(bc->pcpu_freed); } void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) { mutex_init(&c->lock); INIT_LIST_HEAD(&c->freed_pcpu); INIT_LIST_HEAD(&c->freed_nonpcpu); } int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); struct shrinker *shrink; #ifdef __KERNEL__ bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); if (!bc->pcpu_freed) return -BCH_ERR_ENOMEM_fs_btree_cache_init; #endif if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params)) return -BCH_ERR_ENOMEM_fs_btree_cache_init; bc->table_init_done = true; shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name); if (!shrink) return -BCH_ERR_ENOMEM_fs_btree_cache_init; bc->shrink = shrink; shrink->seeks = 0; shrink->count_objects = bch2_btree_key_cache_count; shrink->scan_objects = bch2_btree_key_cache_scan; shrink->private_data = c; shrinker_register(shrink); return 0; } void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) { prt_printf(out, "nr_freed:\t%lu", atomic_long_read(&c->nr_freed)); prt_newline(out); prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys)); prt_newline(out); prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty)); prt_newline(out); } void bch2_btree_key_cache_exit(void) { kmem_cache_destroy(bch2_key_cache); } int __init bch2_btree_key_cache_init(void) { bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT); if (!bch2_key_cache) return -ENOMEM; return 0; }
7 1 6 6 6 6 5 6 6 1 1 1 5 5 5 5 5 5 6 6 6 6 6 1 1 1 1 1 1 1 1 1 1 1 1 1 8 8 6 6 5 6 6 7 7 6 7 7 6 6 1 6 6 5 1 6 1 5 6 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 8 8 8 3 3 7 7 7 8 7 7 7 3 3 5 5 5 1 5 4 1 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 // SPDX-License-Identifier: GPL-2.0 /* * SME code for cfg80211 * both driver SME event handling and the SME implementation * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2009, 2020, 2022-2023 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/wireless.h> #include <linux/export.h> #include <net/iw_handler.h> #include <net/cfg80211.h> #include <net/rtnetlink.h> #include "nl80211.h" #include "reg.h" #include "rdev-ops.h" /* * Software SME in cfg80211, using auth/assoc/deauth calls to the * driver. This is for implementing nl80211's connect/disconnect * and wireless extensions (if configured.) */ struct cfg80211_conn { struct cfg80211_connect_params params; /* these are sub-states of the _CONNECTING sme_state */ enum { CFG80211_CONN_SCANNING, CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, CFG80211_CONN_AUTH_FAILED_TIMEOUT, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_ASSOC_FAILED_TIMEOUT, CFG80211_CONN_DEAUTH, CFG80211_CONN_ABANDON, CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; const u8 *ie; size_t ie_len; bool auto_auth, prev_bssid_valid; }; static void cfg80211_sme_free(struct wireless_dev *wdev) { if (!wdev->conn) return; kfree(wdev->conn->ie); kfree(wdev->conn); wdev->conn = NULL; } static int cfg80211_conn_scan(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_scan_request *request; int n_channels, err; lockdep_assert_wiphy(wdev->wiphy); if (rdev->scan_req || rdev->scan_msg) return -EBUSY; if (wdev->conn->params.channel) n_channels = 1; else n_channels = ieee80211_get_num_supported_channels(wdev->wiphy); request = kzalloc(sizeof(*request) + sizeof(request->ssids[0]) + sizeof(request->channels[0]) * n_channels, GFP_KERNEL); if (!request) return -ENOMEM; if (wdev->conn->params.channel) { enum nl80211_band band = wdev->conn->params.channel->band; struct ieee80211_supported_band *sband = wdev->wiphy->bands[band]; if (!sband) { kfree(request); return -EINVAL; } request->channels[0] = wdev->conn->params.channel; request->rates[band] = (1 << sband->n_bitrates) - 1; } else { int i = 0, j; enum nl80211_band band; struct ieee80211_supported_band *bands; struct ieee80211_channel *channel; for (band = 0; band < NUM_NL80211_BANDS; band++) { bands = wdev->wiphy->bands[band]; if (!bands) continue; for (j = 0; j < bands->n_channels; j++) { channel = &bands->channels[j]; if (channel->flags & IEEE80211_CHAN_DISABLED) continue; request->channels[i++] = channel; } request->rates[band] = (1 << bands->n_bitrates) - 1; } n_channels = i; } request->n_channels = n_channels; request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = 1; memcpy(request->ssids[0].ssid, wdev->conn->params.ssid, wdev->conn->params.ssid_len); request->ssids[0].ssid_len = wdev->conn->params.ssid_len; eth_broadcast_addr(request->bssid); request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; rdev->scan_req = request; err = rdev_scan(rdev, request); if (!err) { wdev->conn->state = CFG80211_CONN_SCANNING; nl80211_send_scan_start(rdev, wdev); dev_hold(wdev->netdev); } else { rdev->scan_req = NULL; kfree(request); } return err; } static int cfg80211_conn_do_work(struct wireless_dev *wdev, enum nl80211_timeout_reason *treason) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_connect_params *params; struct cfg80211_auth_request auth_req = {}; struct cfg80211_assoc_request req = {}; int err; lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn) return 0; params = &wdev->conn->params; switch (wdev->conn->state) { case CFG80211_CONN_SCANNING: /* didn't find it during scan ... */ return -ENOENT; case CFG80211_CONN_SCAN_AGAIN: return cfg80211_conn_scan(wdev); case CFG80211_CONN_AUTHENTICATE_NEXT: if (WARN_ON(!rdev->ops->auth)) return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_AUTHENTICATING; auth_req.key = params->key; auth_req.key_len = params->key_len; auth_req.key_idx = params->key_idx; auth_req.auth_type = params->auth_type; auth_req.bss = cfg80211_get_bss(&rdev->wiphy, params->channel, params->bssid, params->ssid, params->ssid_len, IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); auth_req.link_id = -1; err = cfg80211_mlme_auth(rdev, wdev->netdev, &auth_req); cfg80211_put_bss(&rdev->wiphy, auth_req.bss); return err; case CFG80211_CONN_AUTH_FAILED_TIMEOUT: *treason = NL80211_TIMEOUT_AUTH; return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: if (WARN_ON(!rdev->ops->assoc)) return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_ASSOCIATING; if (wdev->conn->prev_bssid_valid) req.prev_bssid = wdev->conn->prev_bssid; req.ie = params->ie; req.ie_len = params->ie_len; req.use_mfp = params->mfp != NL80211_MFP_NO; req.crypto = params->crypto; req.flags = params->flags; req.ht_capa = params->ht_capa; req.ht_capa_mask = params->ht_capa_mask; req.vht_capa = params->vht_capa; req.vht_capa_mask = params->vht_capa_mask; req.link_id = -1; req.bss = cfg80211_get_bss(&rdev->wiphy, params->channel, params->bssid, params->ssid, params->ssid_len, IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); if (!req.bss) { err = -ENOENT; } else { err = cfg80211_mlme_assoc(rdev, wdev->netdev, &req, NULL); cfg80211_put_bss(&rdev->wiphy, req.bss); } if (err) cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); return err; case CFG80211_CONN_ASSOC_FAILED_TIMEOUT: *treason = NL80211_TIMEOUT_ASSOC; fallthrough; case CFG80211_CONN_ASSOC_FAILED: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); return -ENOTCONN; case CFG80211_CONN_DEAUTH: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); fallthrough; case CFG80211_CONN_ABANDON: /* free directly, disconnected event already sent */ cfg80211_sme_free(wdev); return 0; default: return 0; } } void cfg80211_conn_work(struct work_struct *work) { struct cfg80211_registered_device *rdev = container_of(work, struct cfg80211_registered_device, conn_work); struct wireless_dev *wdev; u8 bssid_buf[ETH_ALEN], *bssid = NULL; enum nl80211_timeout_reason treason; wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; if (!netif_running(wdev->netdev)) continue; if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED) continue; if (wdev->conn->params.bssid) { memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); bssid = bssid_buf; } treason = NL80211_TIMEOUT_UNSPECIFIED; if (cfg80211_conn_do_work(wdev, &treason)) { struct cfg80211_connect_resp_params cr; memset(&cr, 0, sizeof(cr)); cr.status = -1; cr.links[0].bssid = bssid; cr.timeout_reason = treason; __cfg80211_connect_result(wdev->netdev, &cr, false); } } wiphy_unlock(&rdev->wiphy); } static void cfg80211_step_auth_next(struct cfg80211_conn *conn, struct cfg80211_bss *bss) { memcpy(conn->bssid, bss->bssid, ETH_ALEN); conn->params.bssid = conn->bssid; conn->params.channel = bss->channel; conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; } /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; lockdep_assert_wiphy(wdev->wiphy); bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel, wdev->conn->params.bssid, wdev->conn->params.ssid, wdev->conn->params.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY(wdev->conn->params.privacy)); if (!bss) return NULL; cfg80211_step_auth_next(wdev->conn, bss); schedule_work(&rdev->conn_work); return bss; } void cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn) return; if (wdev->conn->state != CFG80211_CONN_SCANNING && wdev->conn->state != CFG80211_CONN_SCAN_AGAIN) return; bss = cfg80211_get_conn_bss(wdev); if (bss) cfg80211_put_bss(&rdev->wiphy, bss); else schedule_work(&rdev->conn_work); } void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u16 status_code = le16_to_cpu(mgmt->u.auth.status_code); lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED) return; if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG && wdev->conn->auto_auth && wdev->conn->params.auth_type != NL80211_AUTHTYPE_NETWORK_EAP) { /* select automatically between only open, shared, leap */ switch (wdev->conn->params.auth_type) { case NL80211_AUTHTYPE_OPEN_SYSTEM: if (wdev->connect_keys) wdev->conn->params.auth_type = NL80211_AUTHTYPE_SHARED_KEY; else wdev->conn->params.auth_type = NL80211_AUTHTYPE_NETWORK_EAP; break; case NL80211_AUTHTYPE_SHARED_KEY: wdev->conn->params.auth_type = NL80211_AUTHTYPE_NETWORK_EAP; break; default: /* huh? */ wdev->conn->params.auth_type = NL80211_AUTHTYPE_OPEN_SYSTEM; break; } wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; schedule_work(&rdev->conn_work); } else if (status_code != WLAN_STATUS_SUCCESS) { struct cfg80211_connect_resp_params cr; memset(&cr, 0, sizeof(cr)); cr.status = status_code; cr.links[0].bssid = mgmt->bssid; cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED; __cfg80211_connect_result(wdev->netdev, &cr, false); } else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; schedule_work(&rdev->conn_work); } } bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return false; if (status == WLAN_STATUS_SUCCESS) { wdev->conn->state = CFG80211_CONN_CONNECTED; return false; } if (wdev->conn->prev_bssid_valid) { /* * Some stupid APs don't accept reassoc, so we * need to fall back to trying regular assoc; * return true so no event is sent to userspace. */ wdev->conn->prev_bssid_valid = false; wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; schedule_work(&rdev->conn_work); return true; } wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; schedule_work(&rdev->conn_work); return false; } void cfg80211_sme_deauth(struct wireless_dev *wdev) { cfg80211_sme_free(wdev); } void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; wdev->conn->state = CFG80211_CONN_AUTH_FAILED_TIMEOUT; schedule_work(&rdev->conn_work); } void cfg80211_sme_disassoc(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; wdev->conn->state = CFG80211_CONN_DEAUTH; schedule_work(&rdev->conn_work); } void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; wdev->conn->state = CFG80211_CONN_ASSOC_FAILED_TIMEOUT; schedule_work(&rdev->conn_work); } void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; wdev->conn->state = CFG80211_CONN_ABANDON; schedule_work(&rdev->conn_work); } static void cfg80211_wdev_release_bsses(struct wireless_dev *wdev) { unsigned int link; for_each_valid_link(wdev, link) { if (!wdev->links[link].client.current_bss) continue; cfg80211_unhold_bss(wdev->links[link].client.current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->links[link].client.current_bss->pub); wdev->links[link].client.current_bss = NULL; } } void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask) { unsigned int link; for_each_valid_link(wdev, link) { if (!wdev->links[link].client.current_bss || !(link_mask & BIT(link))) continue; cfg80211_unhold_bss(wdev->links[link].client.current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->links[link].client.current_bss->pub); wdev->links[link].client.current_bss = NULL; } } static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *buf; size_t offs; if (!rdev->wiphy.extended_capabilities_len || (ies && cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY, ies, ies_len))) { *out_ies = kmemdup(ies, ies_len, GFP_KERNEL); if (!*out_ies) return -ENOMEM; *out_ies_len = ies_len; return 0; } buf = kmalloc(ies_len + rdev->wiphy.extended_capabilities_len + 2, GFP_KERNEL); if (!buf) return -ENOMEM; if (ies_len) { static const u8 before_extcapa[] = { /* not listing IEs expected to be created by driver */ WLAN_EID_RSN, WLAN_EID_QOS_CAPA, WLAN_EID_RRM_ENABLED_CAPABILITIES, WLAN_EID_MOBILITY_DOMAIN, WLAN_EID_SUPPORTED_REGULATORY_CLASSES, WLAN_EID_BSS_COEX_2040, }; offs = ieee80211_ie_split(ies, ies_len, before_extcapa, ARRAY_SIZE(before_extcapa), 0); memcpy(buf, ies, offs); /* leave a whole for extended capabilities IE */ memcpy(buf + offs + rdev->wiphy.extended_capabilities_len + 2, ies + offs, ies_len - offs); } else { offs = 0; } /* place extended capabilities IE (with only driver capabilities) */ buf[offs] = WLAN_EID_EXT_CAPABILITY; buf[offs + 1] = rdev->wiphy.extended_capabilities_len; memcpy(buf + offs + 2, rdev->wiphy.extended_capabilities, rdev->wiphy.extended_capabilities_len); *out_ies = buf; *out_ies_len = ies_len + rdev->wiphy.extended_capabilities_len + 2; return 0; } static int cfg80211_sme_connect(struct wireless_dev *wdev, struct cfg80211_connect_params *connect, const u8 *prev_bssid) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; int err; if (!rdev->ops->auth || !rdev->ops->assoc) return -EOPNOTSUPP; cfg80211_wdev_release_bsses(wdev); if (wdev->connected) { cfg80211_sme_free(wdev); wdev->connected = false; } if (wdev->conn) return -EINPROGRESS; wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); if (!wdev->conn) return -ENOMEM; /* * Copy all parameters, and treat explicitly IEs, BSSID, SSID. */ memcpy(&wdev->conn->params, connect, sizeof(*connect)); if (connect->bssid) { wdev->conn->params.bssid = wdev->conn->bssid; memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); } if (cfg80211_sme_get_conn_ies(wdev, connect->ie, connect->ie_len, &wdev->conn->ie, &wdev->conn->params.ie_len)) { kfree(wdev->conn); wdev->conn = NULL; return -ENOMEM; } wdev->conn->params.ie = wdev->conn->ie; if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { wdev->conn->auto_auth = true; /* start with open system ... should mostly work */ wdev->conn->params.auth_type = NL80211_AUTHTYPE_OPEN_SYSTEM; } else { wdev->conn->auto_auth = false; } wdev->conn->params.ssid = wdev->u.client.ssid; wdev->conn->params.ssid_len = wdev->u.client.ssid_len; /* see if we have the bss already */ bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel, wdev->conn->params.bssid, wdev->conn->params.ssid, wdev->conn->params.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY(wdev->conn->params.privacy)); if (prev_bssid) { memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); wdev->conn->prev_bssid_valid = true; } /* we're good if we have a matching bss struct */ if (bss) { enum nl80211_timeout_reason treason; cfg80211_step_auth_next(wdev->conn, bss); err = cfg80211_conn_do_work(wdev, &treason); cfg80211_put_bss(wdev->wiphy, bss); } else { /* otherwise we'll need to scan for the AP first */ err = cfg80211_conn_scan(wdev); /* * If we can't scan right now, then we need to scan again * after the current scan finished, since the parameters * changed (unless we find a good AP anyway). */ if (err == -EBUSY) { err = 0; wdev->conn->state = CFG80211_CONN_SCAN_AGAIN; } } if (err) cfg80211_sme_free(wdev); return err; } static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err; if (!wdev->conn) return 0; if (!rdev->ops->deauth) return -EOPNOTSUPP; if (wdev->conn->state == CFG80211_CONN_SCANNING || wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) { err = 0; goto out; } /* wdev->conn->params.bssid must be set if > SCANNING */ err = cfg80211_mlme_deauth(rdev, wdev->netdev, wdev->conn->params.bssid, NULL, 0, reason, false); out: cfg80211_sme_free(wdev); return err; } /* * code shared for in-device and software SME */ static bool cfg80211_is_all_idle(void) { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; bool is_all_idle = true; /* * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. * Also if there is any other active beaconing interface we * need not issue a disconnect hint and reset any info such * as chan dfs state, etc. */ for_each_rdev(rdev) { wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->conn || wdev->connected || cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; } wiphy_unlock(&rdev->wiphy); } return is_all_idle; } static void disconnect_work(struct work_struct *work) { rtnl_lock(); if (cfg80211_is_all_idle()) regulatory_hint_disconnect(); rtnl_unlock(); } DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); static void cfg80211_connect_result_release_bsses(struct wireless_dev *wdev, struct cfg80211_connect_resp_params *cr) { unsigned int link; for_each_valid_link(cr, link) { if (!cr->links[link].bss) continue; cfg80211_unhold_bss(bss_from_pub(cr->links[link].bss)); cfg80211_put_bss(wdev->wiphy, cr->links[link].bss); } } /* * API calls for drivers implementing connect/disconnect and * SME event handling */ /* This method must consume bss one way or another */ void __cfg80211_connect_result(struct net_device *dev, struct cfg80211_connect_resp_params *cr, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; const struct element *country_elem = NULL; const struct element *ssid; const u8 *country_data; u8 country_datalen; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif unsigned int link; const u8 *connected_addr; bool bss_not_found = false; lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) goto out; if (cr->valid_links) { if (WARN_ON(!cr->ap_mld_addr)) goto out; for_each_valid_link(cr, link) { if (WARN_ON(!cr->links[link].addr)) goto out; } if (WARN_ON(wdev->connect_keys)) goto out; } wdev->unprot_beacon_reported = 0; nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr, GFP_KERNEL); connected_addr = cr->valid_links ? cr->ap_mld_addr : cr->links[0].bssid; #ifdef CONFIG_CFG80211_WEXT if (wextev && !cr->valid_links) { if (cr->req_ie && cr->status == WLAN_STATUS_SUCCESS) { memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = cr->req_ie_len; wireless_send_event(dev, IWEVASSOCREQIE, &wrqu, cr->req_ie); } if (cr->resp_ie && cr->status == WLAN_STATUS_SUCCESS) { memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = cr->resp_ie_len; wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, cr->resp_ie); } memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; if (connected_addr && cr->status == WLAN_STATUS_SUCCESS) { memcpy(wrqu.ap_addr.sa_data, connected_addr, ETH_ALEN); memcpy(wdev->wext.prev_bssid, connected_addr, ETH_ALEN); wdev->wext.prev_bssid_valid = true; } wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); } #endif if (cr->status == WLAN_STATUS_SUCCESS) { if (!wiphy_to_rdev(wdev->wiphy)->ops->connect) { for_each_valid_link(cr, link) { if (WARN_ON_ONCE(!cr->links[link].bss)) break; } } for_each_valid_link(cr, link) { /* don't do extra lookups for failures */ if (cr->links[link].status != WLAN_STATUS_SUCCESS) continue; if (cr->links[link].bss) continue; cr->links[link].bss = cfg80211_get_bss(wdev->wiphy, NULL, cr->links[link].bssid, wdev->u.client.ssid, wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); if (!cr->links[link].bss) { bss_not_found = true; break; } cfg80211_hold_bss(bss_from_pub(cr->links[link].bss)); } } cfg80211_wdev_release_bsses(wdev); if (cr->status != WLAN_STATUS_SUCCESS) { kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; wdev->u.client.ssid_len = 0; wdev->conn_owner_nlportid = 0; cfg80211_connect_result_release_bsses(wdev, cr); cfg80211_sme_free(wdev); return; } if (WARN_ON(bss_not_found)) { cfg80211_connect_result_release_bsses(wdev, cr); return; } memset(wdev->links, 0, sizeof(wdev->links)); for_each_valid_link(cr, link) { if (cr->links[link].status == WLAN_STATUS_SUCCESS) continue; cr->valid_links &= ~BIT(link); /* don't require bss pointer for failed links */ if (!cr->links[link].bss) continue; cfg80211_unhold_bss(bss_from_pub(cr->links[link].bss)); cfg80211_put_bss(wdev->wiphy, cr->links[link].bss); } wdev->valid_links = cr->valid_links; for_each_valid_link(cr, link) wdev->links[link].client.current_bss = bss_from_pub(cr->links[link].bss); wdev->connected = true; ether_addr_copy(wdev->u.client.connected_addr, connected_addr); if (cr->valid_links) { for_each_valid_link(cr, link) memcpy(wdev->links[link].addr, cr->links[link].addr, ETH_ALEN); } cfg80211_upload_connect_keys(wdev); rcu_read_lock(); for_each_valid_link(cr, link) { country_elem = ieee80211_bss_get_elem(cr->links[link].bss, WLAN_EID_COUNTRY); if (country_elem) break; } if (!country_elem) { rcu_read_unlock(); return; } country_datalen = country_elem->datalen; country_data = kmemdup(country_elem->data, country_datalen, GFP_ATOMIC); rcu_read_unlock(); if (!country_data) return; regulatory_hint_country_ie(wdev->wiphy, cr->links[link].bss->channel->band, country_data, country_datalen); kfree(country_data); if (!wdev->u.client.ssid_len) { rcu_read_lock(); for_each_valid_link(cr, link) { ssid = ieee80211_bss_get_elem(cr->links[link].bss, WLAN_EID_SSID); if (!ssid || !ssid->datalen) continue; memcpy(wdev->u.client.ssid, ssid->data, ssid->datalen); wdev->u.client.ssid_len = ssid->datalen; break; } rcu_read_unlock(); } return; out: for_each_valid_link(cr, link) cfg80211_put_bss(wdev->wiphy, cr->links[link].bss); } static void cfg80211_update_link_bss(struct wireless_dev *wdev, struct cfg80211_bss **bss) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_internal_bss *ibss; if (!*bss) return; ibss = bss_from_pub(*bss); if (list_empty(&ibss->list)) { struct cfg80211_bss *found = NULL, *tmp = *bss; found = cfg80211_get_bss(wdev->wiphy, NULL, (*bss)->bssid, wdev->u.client.ssid, wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); if (found) { /* The same BSS is already updated so use it * instead, as it has latest info. */ *bss = found; } else { /* Update with BSS provided by driver, it will * be freshly added and ref cnted, we can free * the old one. * * signal_valid can be false, as we are not * expecting the BSS to be found. * * keep the old timestamp to avoid confusion */ cfg80211_bss_update(rdev, ibss, false, ibss->ts); } cfg80211_put_bss(wdev->wiphy, tmp); } } /* Consumes bss object(s) one way or another */ void cfg80211_connect_done(struct net_device *dev, struct cfg80211_connect_resp_params *params, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; u8 *next; size_t link_info_size = 0; unsigned int link; for_each_valid_link(params, link) { cfg80211_update_link_bss(wdev, &params->links[link].bss); link_info_size += params->links[link].bssid ? ETH_ALEN : 0; link_info_size += params->links[link].addr ? ETH_ALEN : 0; } ev = kzalloc(sizeof(*ev) + (params->ap_mld_addr ? ETH_ALEN : 0) + params->req_ie_len + params->resp_ie_len + params->fils.kek_len + params->fils.pmk_len + (params->fils.pmkid ? WLAN_PMKID_LEN : 0) + link_info_size, gfp); if (!ev) { for_each_valid_link(params, link) cfg80211_put_bss(wdev->wiphy, params->links[link].bss); return; } ev->type = EVENT_CONNECT_RESULT; next = ((u8 *)ev) + sizeof(*ev); if (params->ap_mld_addr) { ev->cr.ap_mld_addr = next; memcpy((void *)ev->cr.ap_mld_addr, params->ap_mld_addr, ETH_ALEN); next += ETH_ALEN; } if (params->req_ie_len) { ev->cr.req_ie = next; ev->cr.req_ie_len = params->req_ie_len; memcpy((void *)ev->cr.req_ie, params->req_ie, params->req_ie_len); next += params->req_ie_len; } if (params->resp_ie_len) { ev->cr.resp_ie = next; ev->cr.resp_ie_len = params->resp_ie_len; memcpy((void *)ev->cr.resp_ie, params->resp_ie, params->resp_ie_len); next += params->resp_ie_len; } if (params->fils.kek_len) { ev->cr.fils.kek = next; ev->cr.fils.kek_len = params->fils.kek_len; memcpy((void *)ev->cr.fils.kek, params->fils.kek, params->fils.kek_len); next += params->fils.kek_len; } if (params->fils.pmk_len) { ev->cr.fils.pmk = next; ev->cr.fils.pmk_len = params->fils.pmk_len; memcpy((void *)ev->cr.fils.pmk, params->fils.pmk, params->fils.pmk_len); next += params->fils.pmk_len; } if (params->fils.pmkid) { ev->cr.fils.pmkid = next; memcpy((void *)ev->cr.fils.pmkid, params->fils.pmkid, WLAN_PMKID_LEN); next += WLAN_PMKID_LEN; } ev->cr.fils.update_erp_next_seq_num = params->fils.update_erp_next_seq_num; if (params->fils.update_erp_next_seq_num) ev->cr.fils.erp_next_seq_num = params->fils.erp_next_seq_num; ev->cr.valid_links = params->valid_links; for_each_valid_link(params, link) { if (params->links[link].bss) cfg80211_hold_bss( bss_from_pub(params->links[link].bss)); ev->cr.links[link].bss = params->links[link].bss; if (params->links[link].addr) { ev->cr.links[link].addr = next; memcpy((void *)ev->cr.links[link].addr, params->links[link].addr, ETH_ALEN); next += ETH_ALEN; } if (params->links[link].bssid) { ev->cr.links[link].bssid = next; memcpy((void *)ev->cr.links[link].bssid, params->links[link].bssid, ETH_ALEN); next += ETH_ALEN; } } ev->cr.status = params->status; ev->cr.timeout_reason = params->timeout_reason; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_connect_done); /* Consumes bss object one way or another */ void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info) { #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif unsigned int link; const u8 *connected_addr; lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) goto out; if (WARN_ON(!wdev->connected)) goto out; if (info->valid_links) { if (WARN_ON(!info->ap_mld_addr)) goto out; for_each_valid_link(info, link) { if (WARN_ON(!info->links[link].addr)) goto out; } } cfg80211_wdev_release_bsses(wdev); for_each_valid_link(info, link) { if (WARN_ON(!info->links[link].bss)) goto out; } memset(wdev->links, 0, sizeof(wdev->links)); wdev->valid_links = info->valid_links; for_each_valid_link(info, link) { cfg80211_hold_bss(bss_from_pub(info->links[link].bss)); wdev->links[link].client.current_bss = bss_from_pub(info->links[link].bss); } connected_addr = info->valid_links ? info->ap_mld_addr : info->links[0].bss->bssid; ether_addr_copy(wdev->u.client.connected_addr, connected_addr); if (info->valid_links) { for_each_valid_link(info, link) memcpy(wdev->links[link].addr, info->links[link].addr, ETH_ALEN); } wdev->unprot_beacon_reported = 0; nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), wdev->netdev, info, GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT if (!info->valid_links) { if (info->req_ie) { memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = info->req_ie_len; wireless_send_event(wdev->netdev, IWEVASSOCREQIE, &wrqu, info->req_ie); } if (info->resp_ie) { memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = info->resp_ie_len; wireless_send_event(wdev->netdev, IWEVASSOCRESPIE, &wrqu, info->resp_ie); } memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; memcpy(wrqu.ap_addr.sa_data, connected_addr, ETH_ALEN); memcpy(wdev->wext.prev_bssid, connected_addr, ETH_ALEN); wdev->wext.prev_bssid_valid = true; wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL); } #endif return; out: for_each_valid_link(info, link) cfg80211_put_bss(wdev->wiphy, info->links[link].bss); } /* Consumes info->links.bss object(s) one way or another */ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; u8 *next; unsigned int link; size_t link_info_size = 0; bool bss_not_found = false; for_each_valid_link(info, link) { link_info_size += info->links[link].addr ? ETH_ALEN : 0; link_info_size += info->links[link].bssid ? ETH_ALEN : 0; if (info->links[link].bss) continue; info->links[link].bss = cfg80211_get_bss(wdev->wiphy, info->links[link].channel, info->links[link].bssid, wdev->u.client.ssid, wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); if (!info->links[link].bss) { bss_not_found = true; break; } } if (WARN_ON(bss_not_found)) goto out; ev = kzalloc(sizeof(*ev) + info->req_ie_len + info->resp_ie_len + info->fils.kek_len + info->fils.pmk_len + (info->fils.pmkid ? WLAN_PMKID_LEN : 0) + (info->ap_mld_addr ? ETH_ALEN : 0) + link_info_size, gfp); if (!ev) goto out; ev->type = EVENT_ROAMED; next = ((u8 *)ev) + sizeof(*ev); if (info->req_ie_len) { ev->rm.req_ie = next; ev->rm.req_ie_len = info->req_ie_len; memcpy((void *)ev->rm.req_ie, info->req_ie, info->req_ie_len); next += info->req_ie_len; } if (info->resp_ie_len) { ev->rm.resp_ie = next; ev->rm.resp_ie_len = info->resp_ie_len; memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); next += info->resp_ie_len; } if (info->fils.kek_len) { ev->rm.fils.kek = next; ev->rm.fils.kek_len = info->fils.kek_len; memcpy((void *)ev->rm.fils.kek, info->fils.kek, info->fils.kek_len); next += info->fils.kek_len; } if (info->fils.pmk_len) { ev->rm.fils.pmk = next; ev->rm.fils.pmk_len = info->fils.pmk_len; memcpy((void *)ev->rm.fils.pmk, info->fils.pmk, info->fils.pmk_len); next += info->fils.pmk_len; } if (info->fils.pmkid) { ev->rm.fils.pmkid = next; memcpy((void *)ev->rm.fils.pmkid, info->fils.pmkid, WLAN_PMKID_LEN); next += WLAN_PMKID_LEN; } ev->rm.fils.update_erp_next_seq_num = info->fils.update_erp_next_seq_num; if (info->fils.update_erp_next_seq_num) ev->rm.fils.erp_next_seq_num = info->fils.erp_next_seq_num; if (info->ap_mld_addr) { ev->rm.ap_mld_addr = next; memcpy((void *)ev->rm.ap_mld_addr, info->ap_mld_addr, ETH_ALEN); next += ETH_ALEN; } ev->rm.valid_links = info->valid_links; for_each_valid_link(info, link) { ev->rm.links[link].bss = info->links[link].bss; if (info->links[link].addr) { ev->rm.links[link].addr = next; memcpy((void *)ev->rm.links[link].addr, info->links[link].addr, ETH_ALEN); next += ETH_ALEN; } if (info->links[link].bssid) { ev->rm.links[link].bssid = next; memcpy((void *)ev->rm.links[link].bssid, info->links[link].bssid, ETH_ALEN); next += ETH_ALEN; } } spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); return; out: for_each_valid_link(info, link) cfg80211_put_bss(wdev->wiphy, info->links[link].bss); } EXPORT_SYMBOL(cfg80211_roamed); void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len) { lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT && wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO)) return; if (wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) { if (WARN_ON(!wdev->connected) || WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, peer_addr))) return; } nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, peer_addr, td_bitmap, td_bitmap_len); } void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; if (WARN_ON(!peer_addr)) return; ev = kzalloc(sizeof(*ev) + td_bitmap_len, gfp); if (!ev) return; ev->type = EVENT_PORT_AUTHORIZED; memcpy(ev->pa.peer_addr, peer_addr, ETH_ALEN); ev->pa.td_bitmap = ((u8 *)ev) + sizeof(*ev); ev->pa.td_bitmap_len = td_bitmap_len; memcpy((void *)ev->pa.td_bitmap, td_bitmap, td_bitmap_len); /* * Use the wdev event list so that if there are pending * connected/roamed events, they will be reported first. */ spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_port_authorized); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; cfg80211_wdev_release_bsses(wdev); wdev->connected = false; wdev->u.client.ssid_len = 0; wdev->conn_owner_nlportid = 0; kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); /* stop critical protocol if supported */ if (rdev->ops->crit_proto_stop && rdev->crit_proto_nlportid) { rdev->crit_proto_nlportid = 0; rdev_crit_proto_stop(rdev, wdev); } /* * Delete all the keys ... pairwise keys can't really * exist any more anyway, but default keys might. */ if (rdev->ops->del_key) { int max_key_idx = 5; if (wiphy_ext_feature_isset( wdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION) || wiphy_ext_feature_isset( wdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; for (i = 0; i <= max_key_idx; i++) rdev_del_key(rdev, dev, -1, i, false, NULL); } rdev_set_qos_map(rdev, dev, NULL); #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); wdev->wext.connect.ssid_len = 0; #endif schedule_work(&cfg80211_disconnect_work); cfg80211_schedule_channels_check(wdev); } void cfg80211_disconnected(struct net_device *dev, u16 reason, const u8 *ie, size_t ie_len, bool locally_generated, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; ev = kzalloc(sizeof(*ev) + ie_len, gfp); if (!ev) return; ev->type = EVENT_DISCONNECTED; ev->dc.ie = ((u8 *)ev) + sizeof(*ev); ev->dc.ie_len = ie_len; memcpy((void *)ev->dc.ie, ie, ie_len); ev->dc.reason = reason; ev->dc.locally_generated = locally_generated; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_disconnected); /* * API calls for nl80211/wext compatibility code */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, struct cfg80211_cached_keys *connkeys, const u8 *prev_bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; lockdep_assert_wiphy(wdev->wiphy); /* * If we have an ssid_len, we're trying to connect or are * already connected, so reject a new SSID unless it's the * same (which is the case for re-association.) */ if (wdev->u.client.ssid_len && (wdev->u.client.ssid_len != connect->ssid_len || memcmp(wdev->u.client.ssid, connect->ssid, wdev->u.client.ssid_len))) return -EALREADY; /* * If connected, reject (re-)association unless prev_bssid * matches the current BSSID. */ if (wdev->connected) { if (!prev_bssid) return -EALREADY; if (!ether_addr_equal(prev_bssid, wdev->u.client.connected_addr)) return -ENOTCONN; } /* * Reject if we're in the process of connecting with WEP, * this case isn't very interesting and trying to handle * it would make the code much more complex. */ if (wdev->connect_keys) return -EINPROGRESS; cfg80211_oper_and_ht_capa(&connect->ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); cfg80211_oper_and_vht_capa(&connect->vht_capa_mask, rdev->wiphy.vht_capa_mod_mask); if (connkeys && connkeys->def >= 0) { int idx; u32 cipher; idx = connkeys->def; cipher = connkeys->params[idx].cipher; /* If given a WEP key we may need it for shared key auth */ if (cipher == WLAN_CIPHER_SUITE_WEP40 || cipher == WLAN_CIPHER_SUITE_WEP104) { connect->key_idx = idx; connect->key = connkeys->params[idx].key; connect->key_len = connkeys->params[idx].key_len; /* * If ciphers are not set (e.g. when going through * iwconfig), we have to set them appropriately here. */ if (connect->crypto.cipher_group == 0) connect->crypto.cipher_group = cipher; if (connect->crypto.n_ciphers_pairwise == 0) { connect->crypto.n_ciphers_pairwise = 1; connect->crypto.ciphers_pairwise[0] = cipher; } } } else { if (WARN_ON(connkeys)) return -EINVAL; /* connect can point to wdev->wext.connect which * can hold key data from a previous connection */ connect->key = NULL; connect->key_len = 0; connect->key_idx = 0; } wdev->connect_keys = connkeys; memcpy(wdev->u.client.ssid, connect->ssid, connect->ssid_len); wdev->u.client.ssid_len = connect->ssid_len; wdev->conn_bss_type = connect->pbss ? IEEE80211_BSS_TYPE_PBSS : IEEE80211_BSS_TYPE_ESS; if (!rdev->ops->connect) err = cfg80211_sme_connect(wdev, connect, prev_bssid); else err = rdev_connect(rdev, dev, connect); if (err) { wdev->connect_keys = NULL; /* * This could be reassoc getting refused, don't clear * ssid_len in that case. */ if (!wdev->connected) wdev->u.client.ssid_len = 0; return err; } return 0; } int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err = 0; lockdep_assert_wiphy(wdev->wiphy); kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; wdev->conn_owner_nlportid = 0; if (wdev->conn) err = cfg80211_sme_disconnect(wdev, reason); else if (!rdev->ops->disconnect) cfg80211_mlme_down(rdev, dev); else if (wdev->u.client.ssid_len) err = rdev_disconnect(rdev, dev, reason); /* * Clear ssid_len unless we actually were fully connected, * in which case cfg80211_disconnected() will take care of * this later. */ if (!wdev->connected) wdev->u.client.ssid_len = 0; return err; } /* * Used to clean up after the connection / connection attempt owner socket * disconnects */ void cfg80211_autodisconnect_wk(struct work_struct *work) { struct wireless_dev *wdev = container_of(work, struct wireless_dev, disconnect_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); wiphy_lock(wdev->wiphy); if (wdev->conn_owner_nlportid) { switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, wdev->netdev, false); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, wdev->netdev, -1, false); break; case NL80211_IFTYPE_MESH_POINT: cfg80211_leave_mesh(rdev, wdev->netdev); break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: /* * Use disconnect_bssid if still connecting and * ops->disconnect not implemented. Otherwise we can * use cfg80211_disconnect. */ if (rdev->ops->disconnect || wdev->connected) cfg80211_disconnect(rdev, wdev->netdev, WLAN_REASON_DEAUTH_LEAVING, true); else cfg80211_mlme_deauth(rdev, wdev->netdev, wdev->disconnect_bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); break; default: break; } } wiphy_unlock(wdev->wiphy); }
161 155 105 92 93 93 2 2 4 92 158 52 49 10 48 39 1 159 6 6 47 6 153 159 10 23 169 157 9 168 5 5 5 4 5 5 2 169 165 165 163 163 165 165 165 161 10 10 8 8 8 8 8 10 10 1 10 1 23 18 15 12 23 155 152 153 161 26 5 5 5 26 25 1 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/rmap.h> #include <linux/hugetlb.h> #include <linux/swap.h> #include <linux/swapops.h> #include "internal.h" static inline bool not_found(struct page_vma_mapped_walk *pvmw) { page_vma_mapped_walk_done(pvmw); return false; } static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) { pte_t ptent; if (pvmw->flags & PVMW_SYNC) { /* Use the stricter lookup */ pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd, pvmw->address, &pvmw->ptl); *ptlp = pvmw->ptl; return !!pvmw->pte; } /* * It is important to return the ptl corresponding to pte, * in case *pvmw->pmd changes underneath us; so we need to * return it even when choosing not to lock, in case caller * proceeds to loop over next ptes, and finds a match later. * Though, in most cases, page lock already protects this. */ pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd, pvmw->address, ptlp); if (!pvmw->pte) return false; ptent = ptep_get(pvmw->pte); if (pvmw->flags & PVMW_MIGRATION) { if (!is_swap_pte(ptent)) return false; } else if (is_swap_pte(ptent)) { swp_entry_t entry; /* * Handle un-addressable ZONE_DEVICE memory. * * We get here when we are trying to unmap a private * device page from the process address space. Such * page is not CPU accessible and thus is mapped as * a special swap entry, nonetheless it still does * count as a valid regular mapping for the page * (and is accounted as such in page maps count). * * So handle this special case as if it was a normal * page mapping ie lock CPU page table and return true. * * For more details on device private memory see HMM * (include/linux/hmm.h or mm/hmm.c). */ entry = pte_to_swp_entry(ptent); if (!is_device_private_entry(entry) && !is_device_exclusive_entry(entry)) return false; } else if (!pte_present(ptent)) { return false; } pvmw->ptl = *ptlp; spin_lock(pvmw->ptl); return true; } /** * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is * mapped at the @pvmw->pte * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range * for checking * * page_vma_mapped_walk() found a place where pfn range is *potentially* * mapped. check_pte() has to validate this. * * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to * arbitrary page. * * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) * * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) * * Otherwise, return false. * */ static bool check_pte(struct page_vma_mapped_walk *pvmw) { unsigned long pfn; pte_t ptent = ptep_get(pvmw->pte); if (pvmw->flags & PVMW_MIGRATION) { swp_entry_t entry; if (!is_swap_pte(ptent)) return false; entry = pte_to_swp_entry(ptent); if (!is_migration_entry(entry) && !is_device_exclusive_entry(entry)) return false; pfn = swp_offset_pfn(entry); } else if (is_swap_pte(ptent)) { swp_entry_t entry; /* Handle un-addressable ZONE_DEVICE memory */ entry = pte_to_swp_entry(ptent); if (!is_device_private_entry(entry) && !is_device_exclusive_entry(entry)) return false; pfn = swp_offset_pfn(entry); } else { if (!pte_present(ptent)) return false; pfn = pte_pfn(ptent); } return (pfn - pvmw->pfn) < pvmw->nr_pages; } /* Returns true if the two ranges overlap. Careful to not overflow. */ static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw) { if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn) return false; if (pfn > pvmw->pfn + pvmw->nr_pages - 1) return false; return true; } static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) { pvmw->address = (pvmw->address + size) & ~(size - 1); if (!pvmw->address) pvmw->address = ULONG_MAX; } /** * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at * @pvmw->address * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags * must be set. pmd, pte and ptl must be NULL. * * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is * adjusted if needed (for PTE-mapped THPs). * * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in * a loop to find all PTEs that map the THP. * * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry * regardless of which page table level the page is mapped at. @pvmw->pmd is * NULL. * * Returns false if there are no more page table entries for the page in * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. * * If you need to stop the walk before page_vma_mapped_walk() returned false, * use page_vma_mapped_walk_done(). It will do the housekeeping. */ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) { struct vm_area_struct *vma = pvmw->vma; struct mm_struct *mm = vma->vm_mm; unsigned long end; spinlock_t *ptl; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t pmde; /* The only possible pmd mapping has been handled on last iteration */ if (pvmw->pmd && !pvmw->pte) return not_found(pvmw); if (unlikely(is_vm_hugetlb_page(vma))) { struct hstate *hstate = hstate_vma(vma); unsigned long size = huge_page_size(hstate); /* The only possible mapping was handled on last iteration */ if (pvmw->pte) return not_found(pvmw); /* * All callers that get here will already hold the * i_mmap_rwsem. Therefore, no additional locks need to be * taken before calling hugetlb_walk(). */ pvmw->pte = hugetlb_walk(vma, pvmw->address, size); if (!pvmw->pte) return false; pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte); if (!check_pte(pvmw)) return not_found(pvmw); return true; } end = vma_address_end(pvmw); if (pvmw->pte) goto next_pte; restart: do { pgd = pgd_offset(mm, pvmw->address); if (!pgd_present(*pgd)) { step_forward(pvmw, PGDIR_SIZE); continue; } p4d = p4d_offset(pgd, pvmw->address); if (!p4d_present(*p4d)) { step_forward(pvmw, P4D_SIZE); continue; } pud = pud_offset(p4d, pvmw->address); if (!pud_present(*pud)) { step_forward(pvmw, PUD_SIZE); continue; } pvmw->pmd = pmd_offset(pud, pvmw->address); /* * Make sure the pmd value isn't cached in a register by the * compiler and used as a stale value after we've observed a * subsequent update. */ pmde = pmdp_get_lockless(pvmw->pmd); if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) || (pmd_present(pmde) && pmd_devmap(pmde))) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); pmde = *pvmw->pmd; if (!pmd_present(pmde)) { swp_entry_t entry; if (!thp_migration_supported() || !(pvmw->flags & PVMW_MIGRATION)) return not_found(pvmw); entry = pmd_to_swp_entry(pmde); if (!is_migration_entry(entry) || !check_pmd(swp_offset_pfn(entry), pvmw)) return not_found(pvmw); return true; } if (likely(pmd_trans_huge(pmde) || pmd_devmap(pmde))) { if (pvmw->flags & PVMW_MIGRATION) return not_found(pvmw); if (!check_pmd(pmd_pfn(pmde), pvmw)) return not_found(pvmw); return true; } /* THP pmd was split under us: handle on pte level */ spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } else if (!pmd_present(pmde)) { /* * If PVMW_SYNC, take and drop THP pmd lock so that we * cannot return prematurely, while zap_huge_pmd() has * cleared *pmd but not decremented compound_mapcount(). */ if ((pvmw->flags & PVMW_SYNC) && thp_vma_suitable_order(vma, pvmw->address, PMD_ORDER) && (pvmw->nr_pages >= HPAGE_PMD_NR)) { spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); spin_unlock(ptl); } step_forward(pvmw, PMD_SIZE); continue; } if (!map_pte(pvmw, &ptl)) { if (!pvmw->pte) goto restart; goto next_pte; } this_pte: if (check_pte(pvmw)) return true; next_pte: do { pvmw->address += PAGE_SIZE; if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { if (pvmw->ptl) { spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } pte_unmap(pvmw->pte); pvmw->pte = NULL; goto restart; } pvmw->pte++; } while (pte_none(ptep_get(pvmw->pte))); if (!pvmw->ptl) { pvmw->ptl = ptl; spin_lock(pvmw->ptl); } goto this_pte; } while (pvmw->address < end); return false; } /** * page_mapped_in_vma - check whether a page is really mapped in a VMA * @page: the page to test * @vma: the VMA to test * * Returns 1 if the page is mapped into the page tables of the VMA, 0 * if the page is not mapped into the page tables of this VMA. Only * valid for normal file or anonymous VMAs. */ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) { struct page_vma_mapped_walk pvmw = { .pfn = page_to_pfn(page), .nr_pages = 1, .vma = vma, .flags = PVMW_SYNC, }; pvmw.address = vma_address(page, vma); if (pvmw.address == -EFAULT) return 0; if (!page_vma_mapped_walk(&pvmw)) return 0; page_vma_mapped_walk_done(&pvmw); return 1; }
8 5 19 5 1 1 1 19 19 2 2 19 19 19 19 9 60 60 60 1 1 60 60 60 5 60 17 17 11 7 1 7 5 17 4 4 4 4 4 17 13 3 8 8 7 6 5 3 3 1 3 3 3 1 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 // SPDX-License-Identifier: GPL-2.0-only /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ #include <net/tcp.h> #include <net/xfrm.h> #include <net/busy_poll.h> static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) return true; if (after(end_seq, s_win) && before(seq, e_win)) return true; return seq == e_win && seq == end_seq; } static enum tcp_tw_status tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, const struct sk_buff *skb, int mib_idx) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); if (!tcp_oow_rate_limited(twsk_net(tw), skb, mib_idx, &tcptw->tw_last_oow_ack_time)) { /* Send ACK. Note, we do not put the bucket, * it will be released by caller. */ return TCP_TW_ACK; } /* We are rate-limiting, so just release the tw sock and drop skb. */ inet_twsk_put(tw); return TCP_TW_SUCCESS; } static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq) { #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao; ao = rcu_dereference(tcptw->ao_info); if (unlikely(ao && seq < tcptw->tw_rcv_nxt)) WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1); #endif tcptw->tw_rcv_nxt = seq; } /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN * (and, probably, tail of data) and one or more our ACKs are lost. * * What is TIME-WAIT timeout? It is associated with maximal packet * lifetime in the internet, which results in wrong conclusion, that * it is set to catch "old duplicate segments" wandering out of their path. * It is not quite correct. This timeout is calculated so that it exceeds * maximal retransmission timeout enough to allow to lose one (or more) * segments sent by peer and our ACKs. This time may be calculated from RTO. * * When TIME-WAIT socket receives RST, it means that another end * finally closed and we are allowed to kill TIME-WAIT too. * * Second purpose of TIME-WAIT is catching old duplicate segments. * Well, certainly it is pure paranoia, but if we load TIME-WAIT * with this semantics, we MUST NOT kill TIME-WAIT state with RSTs. * * If we invented some more clever way to catch duplicates * (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs. * * The algorithm below is based on FORMAL INTERPRETATION of RFCs. * When you compare it to RFCs, please, read section SEGMENT ARRIVES * from the very beginning. * * NOTE. With recycling (and later with fin-wait-2) TW bucket * is _not_ stateless. It means, that strictly speaking we must * spinlock it. I do not want! Well, probability of misbehaviour * is ridiculously low and, seems, we could use some mb() tricks * to avoid misread sequence numbers, states etc. --ANK * * We don't need to initialize tmp_out.sack_ok as we don't use the results */ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { if (tmp_opt.rcv_tsecr) tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } if (tw->tw_substate == TCP_FIN_WAIT2) { /* Just repeat all the checks of tcp_rcv_state_process() */ /* Out of window, send ACK */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt, tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); if (th->rst) goto kill; if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) return TCP_TW_RST; /* Dup ACK? */ if (!th->ack || !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { inet_twsk_put(tw); return TCP_TW_SUCCESS; } /* New data or FIN. If new data arrive after half-duplex close, * reset. */ if (!th->fin || TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) return TCP_TW_RST; /* FIN arrived, enter true time-wait state. */ tw->tw_substate = TCP_TIME_WAIT; twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent_stamp = ktime_get_seconds(); tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } /* * Now real TIME-WAIT state. * * RFC 1122: * "When a connection is [...] on TIME-WAIT state [...] * [a TCP] MAY accept a new SYN from the remote TCP to * reopen the connection directly, if it: * * (1) assigns its initial sequence number for the new * connection to be larger than the largest sequence * number it used on the previous connection incarnation, * and * * (2) returns to TIME-WAIT state if the SYN turns out * to be an old duplicate". */ if (!paws_reject && (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) { /* In window segment, it may be only reset or bare ack. */ if (th->rst) { /* This is TIME_WAIT assassination, in two flavors. * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; } } else { inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); } if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; tcptw->tw_ts_recent_stamp = ktime_get_seconds(); } inet_twsk_put(tw); return TCP_TW_SUCCESS; } /* Out of window segment. All the segments are ACKed immediately. The only exception is new SYN. We accept it, if it is not old duplicate and we are not in danger to be killed by delayed old duplicates. RFC check is that it has newer sequence number works at rates <40Mbit/sec. However, if paws works, it is reliable AND even more, we even may relax silly seq space cutoff. RED-PEN: we violate main RFC requirement, if this SYN will appear old duplicate (i.e. we receive RST in reply to SYN-ACK), we must return socket to time-wait state. It is not good, but not fatal yet. */ if (th->syn && !th->rst && !th->ack && !paws_reject && (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || (tmp_opt.saw_tstamp && (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; TCP_SKB_CB(skb)->tcp_tw_isn = isn; return TCP_TW_SYN; } if (paws_reject) __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. * * If it is ACKless SYN it may be both old duplicate * and new good SYN with random sequence number <rcv_nxt. * Do not reschedule in the last case. */ if (paws_reject || th->ack) inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); } inet_twsk_put(tw); return TCP_TW_SUCCESS; } EXPORT_SYMBOL(tcp_timewait_state_process); static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) { #ifdef CONFIG_TCP_MD5SIG const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; /* * The timewait bucket does not have the key DB from the * sock structure. We just make a quick copy of the * md5 key being used (if indeed we are using one) * so the timewait ack generating code has the key. */ tcptw->tw_md5_key = NULL; if (!static_branch_unlikely(&tcp_md5_needed.key)) return; key = tp->af_specific->md5_lookup(sk, sk); if (key) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); if (!tcptw->tw_md5_key) return; if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) goto out_free; tcp_md5_add_sigpool(); } return; out_free: WARN_ON_ONCE(1); kfree(tcptw->tw_md5_key); tcptw->tw_md5_key = NULL; #endif } /* * Move a socket to time-wait or dead fin-wait-2 state. */ void tcp_time_wait(struct sock *sk, int state, int timeo) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_timewait_sock *tw; tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_mark = sk->sk_mark; tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; tw->tw_usec_ts = tp->tcp_usec_ts; tcptw->tw_last_oow_ack_time = 0; tcptw->tw_tx_delay = tp->tcp_tx_delay; tw->tw_txhash = sk->sk_txhash; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); tw->tw_ipv6only = sk->sk_ipv6only; } #endif tcp_time_wait_init(sk, tcptw); tcp_ao_time_wait(tcptw, tp); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; if (state == TCP_TIME_WAIT) timeo = TCP_TIMEWAIT_LEN; /* tw_timer is pinned, so we need to make sure BH are disabled * in following section, otherwise timer handler could run before * we complete the initialization. */ local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. * Note that access to tw after this point is illegal. */ inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than * non-graceful socket closings. */ NET_INC_STATS(net, LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); tcp_done(sk); } EXPORT_SYMBOL(tcp_time_wait); #ifdef CONFIG_TCP_MD5SIG static void tcp_md5_twsk_free_rcu(struct rcu_head *head) { struct tcp_md5sig_key *key; key = container_of(head, struct tcp_md5sig_key, rcu); kfree(key); static_branch_slow_dec_deferred(&tcp_md5_needed); tcp_md5_release_sigpool(); } #endif void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG if (static_branch_unlikely(&tcp_md5_needed.key)) { struct tcp_timewait_sock *twsk = tcp_twsk(sk); if (twsk->tw_md5_key) call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu); } #endif tcp_ao_destroy_sock(sk, true); } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); void tcp_twsk_purge(struct list_head *net_exit_list, int family) { bool purged_once = false; struct net *net; list_for_each_entry(net, net_exit_list, exit_list) { if (net->ipv4.tcp_death_row.hashinfo->pernet) { /* Even if tw_refcount == 1, we must clean up kernel reqsk */ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); } else if (!purged_once) { inet_twsk_purge(&tcp_hashinfo, family); purged_once = true; } } } EXPORT_SYMBOL_GPL(tcp_twsk_purge); /* Warning : This function is called without sk_listener being locked. * Be sure to read socket fields once, as their value could change under us. */ void tcp_openreq_init_rwin(struct request_sock *req, const struct sock *sk_listener, const struct dst_entry *dst) { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk_listener); int full_space = tcp_full_space(sk_listener); u32 window_clamp; __u8 rcv_wscale; u32 rcv_wnd; int mss; mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); window_clamp = READ_ONCE(tp->window_clamp); /* Set this up on the first call only */ req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK && (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req); if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); else if (full_space < rcv_wnd * mss) full_space = rcv_wnd * mss; /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(sk_listener, full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, rcv_wnd); ireq->rcv_wscale = rcv_wscale; } EXPORT_SYMBOL(tcp_openreq_init_rwin); static void tcp_ecn_openreq_child(struct tcp_sock *tp, const struct request_sock *req) { tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); bool ca_got_dst = false; if (ca_key != TCP_CA_UNSPEC) { const struct tcp_congestion_ops *ca; rcu_read_lock(); ca = tcp_ca_find_key(ca_key); if (likely(ca && bpf_try_module_get(ca, ca->owner))) { icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); icsk->icsk_ca_ops = ca; ca_got_dst = true; } rcu_read_unlock(); } /* If no valid choice made yet, assign current system default ca. */ if (!ca_got_dst && (!icsk->icsk_ca_setsockopt || !bpf_try_module_get(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner))) tcp_assign_congestion_control(sk); tcp_set_ca_state(sk, TCP_CA_Open); } EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, struct tcp_sock *newtp) { #if IS_ENABLED(CONFIG_SMC) struct inet_request_sock *ireq; if (static_branch_unlikely(&tcp_have_smc)) { ireq = inet_rsk(req); if (oldtp->syn_smc && !ireq->smc_ok) newtp->syn_smc = 0; } #endif } /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * * Actually, we could lots of memory writes here. tp of listening * socket contains all necessary default parameters. */ struct sock *tcp_create_openreq_child(const struct sock *sk, struct request_sock *req, struct sk_buff *skb) { struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk; const struct tcp_sock *oldtp; struct tcp_sock *newtp; u32 seq; #ifdef CONFIG_TCP_AO struct tcp_ao_key *ao_key; #endif if (!newsk) return NULL; newicsk = inet_csk(newsk); newtp = tcp_sk(newsk); oldtp = tcp_sk(sk); smc_check_reset_syn_req(oldtp, req, newtp); /* Now setup tcp_sock */ newtp->pred_flags = 0; seq = treq->rcv_isn + 1; newtp->rcv_wup = seq; WRITE_ONCE(newtp->copied_seq, seq); WRITE_ONCE(newtp->rcv_nxt, seq); newtp->segs_in = 1; seq = treq->snt_isn + 1; newtp->snd_sml = newtp->snd_una = seq; WRITE_ONCE(newtp->snd_nxt, seq); newtp->snd_up = seq; INIT_LIST_HEAD(&newtp->tsq_node); INIT_LIST_HEAD(&newtp->tsorted_sent_queue); tcp_init_wl(newtp, treq->rcv_isn); minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_ack.lrcvtime = tcp_jiffies32; newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = READ_ONCE(treq->txhash); newtp->total_retrans = req->num_retrans; tcp_init_xmit_timers(newsk); WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; newtp->rx_opt.sack_ok = ireq->sack_ok; newtp->window_clamp = req->rsk_window_clamp; newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; newtp->rx_opt.wscale_ok = ireq->wscale_ok; if (newtp->rx_opt.wscale_ok) { newtp->rx_opt.snd_wscale = ireq->snd_wscale; newtp->rx_opt.rcv_wscale = ireq->rcv_wscale; } else { newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; newtp->window_clamp = min(newtp->window_clamp, 65535U); } newtp->snd_wnd = ntohs(tcp_hdr(skb)->window) << newtp->rx_opt.snd_wscale; newtp->max_window = newtp->snd_wnd; if (newtp->rx_opt.tstamp_ok) { newtp->tcp_usec_ts = treq->req_usec_ts; newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { newtp->tcp_usec_ts = 0; newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } if (req->num_timeout) { newtp->total_rto = req->num_timeout; newtp->undo_marker = treq->snt_isn; if (newtp->tcp_usec_ts) { newtp->retrans_stamp = treq->snt_synack; newtp->total_rto_time = (u32)(tcp_clock_us() - newtp->retrans_stamp) / USEC_PER_MSEC; } else { newtp->retrans_stamp = div_u64(treq->snt_synack, USEC_PER_SEC / TCP_TS_HZ); newtp->total_rto_time = tcp_clock_ms() - newtp->retrans_stamp; } newtp->total_rto_recoveries = 1; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ #endif #ifdef CONFIG_TCP_AO newtp->ao_info = NULL; ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1); if (ao_key) newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; tcp_ecn_openreq_child(newtp, req); newtp->fastopen_req = NULL; RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); newtp->bpf_chg_cc_inprogress = 0; tcp_bpf_clone(sk, newsk); __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); return newsk; } EXPORT_SYMBOL(tcp_create_openreq_child); /* * Process an incoming packet for SYN_RECV sockets represented as a * request_sock. Normally sk is the listener socket but for TFO it * points to the child socket. * * XXX (TFO) - The current impl contains a special check for ack * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? * * We don't need to initialize tmp_opt.sack_ok as we don't use the results * * Note: If @fastopen is true, this can be called from process context. * Otherwise, this is from BH context. */ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *req_stolen) { struct tcp_options_received tmp_opt; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); bool paws_reject = false; bool own_req; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = READ_ONCE(req->ts_recent); if (tmp_opt.rcv_tsecr) tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; /* We do not store true stamp, but it is not required, * it can be estimated (approximately) * from another data. */ tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } /* Check for pure retransmitted SYN. */ if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn && flg == TCP_FLAG_SYN && !paws_reject) { /* * RFC793 draws (Incorrectly! It was fixed in RFC1122) * this case on figure 6 and figure 8, but formal * protocol description says NOTHING. * To be more exact, it says that we should send ACK, * because this segment (at least, if it has no data) * is out of window. * * CONCLUSION: RFC793 (even with RFC1122) DOES NOT * describe SYN-RECV state. All the description * is wrong, we cannot believe to it and should * rely only on common sense and implementation * experience. * * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. * * Note that even if there is new data in the SYN packet * they will be thrown away too. * * Reset timer after retransmitting SYNACK, similar to * the idea of fast retransmit in recovery. */ if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time) && !inet_rtx_syn_ack(sk, req)) { unsigned long expires = jiffies; expires += reqsk_timeout(req, TCP_RTO_MAX); if (!fastopen) mod_timer_pending(&req->rsk_timer, expires); else req->rsk_timer.expires = expires; } return NULL; } /* Further reproduces section "SEGMENT ARRIVES" for state SYN-RECEIVED of RFC793. It is broken, however, it does not work only when SYNs are crossed. You would think that SYN crossing is impossible here, since we should have a SYN_SENT socket (from connect()) on our end, but this is not true if the crossed SYNs were sent to both ends by a malicious third party. We must defend against this, and to do that we first verify the ACK (as per RFC793, page 36) and reset if it is invalid. Is this a true full defense? To convince ourselves, let us consider a way in which the ACK test can still pass in this 'malicious crossed SYNs' case. Malicious sender sends identical SYNs (and thus identical sequence numbers) to both A and B: A: gets SYN, seq=7 B: gets SYN, seq=7 By our good fortune, both A and B select the same initial send sequence number of seven :-) A: sends SYN|ACK, seq=7, ack_seq=8 B: sends SYN|ACK, seq=7, ack_seq=8 So we are now A eating this SYN|ACK, ACK test passes. So does sequence test, SYN is truncated, and thus we consider it a bare ACK. If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this bare ACK. Otherwise, we create an established connection. Both ends (listening sockets) accept the new incoming connection and try to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. But generally, we should (RFC lies!) to accept ACK from SYNACK both here and in tcp_rcv_state_process(). tcp_rcv_state_process() does not, hence, we do not too. Note that the case is absolutely generic: we cannot optimize anything here without violating protocol. All the checks must be made before attempt to create socket. */ /* RFC793 page 36: "If the connection is in any non-synchronized state ... * and the incoming segment acknowledges something not yet * sent (the segment carries an unacceptable ACK) ... * a reset is sent." * * Invalid ACK: reset will be sent by listening socket. * Note that the ACK validity check for a Fast Open socket is done * elsewhere and is checked directly against the child socket rather * than req because user data may have been sent out. */ if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1)) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which * is essentially ACK extension and too early or too late values * should cause reset in unsynchronized states. */ /* RFC793: "first check sequence number". */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST) && !tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } /* In sequence, PAWS is OK. */ /* TODO: We probably should defer ts_recent change once * we take ownership of @req. */ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { /* Truncate SYN, it is out of window starting at tcp_rsk(req)->rcv_isn + 1. */ flg &= ~TCP_FLAG_SYN; } /* RFC793: "second check the RST bit" and * "fourth, check the SYN bit" */ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; } /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. * * XXX (TFO) - if we ever allow "data after SYN", the * following check needs to be removed. */ if (!(flg & TCP_FLAG_ACK)) return NULL; /* For Fast Open no more processing is needed (sk is the * child socket). */ if (fastopen) return sk; /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all * the tests. THIS SEGMENT MUST MOVE SOCKET TO * ESTABLISHED STATE. If it will be dropped after * socket is created, wait for troubles. */ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); if (!child) goto listen_overflow; if (own_req && rsk_drop_req(req)) { reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); return child; } sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); *req_stolen = !own_req; return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: if (sk != req->rsk_listener) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) { inet_rsk(req)->acked = 1; return NULL; } embryonic_reset: if (!(flg & TCP_FLAG_RST)) { /* Received a bad SYN pkt - for TFO We try not to reset * the local connection unless it's really necessary to * avoid becoming vulnerable to outside attack aiming at * resetting legit local connections. */ req->rsk_ops->send_reset(sk, skb); } else if (fastopen) { /* received a valid RST pkt */ reqsk_fastopen_remove(sk, req, true); tcp_reset(sk, skb); } if (!fastopen) { bool unlinked = inet_csk_reqsk_queue_drop(sk, req); if (unlinked) __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); *req_stolen = !unlinked; } return NULL; } EXPORT_SYMBOL(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. * * For the vast majority of cases child->sk_state will be TCP_SYN_RECV * when entering. But other states are possible due to a race condition * where after __inet_lookup_established() fails but before the listener * locked is obtained, other packets cause the same connection to * be created. */ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) __releases(&((child)->sk_lock.slock)) { enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; int state = child->sk_state; /* record sk_napi_id and sk_rx_queue_mapping of child. */ sk_mark_napi_id_set(child, skb); tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { reason = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) parent->sk_data_ready(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening * socket does not protect us more. */ __sk_add_backlog(child, skb); } bh_unlock_sock(child); sock_put(child); return reason; } EXPORT_SYMBOL(tcp_child_process);
1 1 1 1 15 13 13 15 13 15 18 18 4 18 18 18 7 7 7 13 13 13 13 13 13 3 13 11 5 2 5 2 5 5 19 15 4 15 15 15 6 15 8 15 18 15 15 1 15 14 15 11 5 19 10 18 15 9 9 10 2 7 7 5 7 5 11 7 11 11 11 11 10 11 11 11 11 11 1 1 8 8 8 8 8 8 18 11 9 11 11 11 1 1 1 1 1 1 1 19 18 11 14 14 14 13 13 13 13 5 20 6 14 13 13 13 13 2 2 2 2 2 2 2 15 2 13 15 2 2 2 15 13 13 13 13 15 4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2016 Facebook */ #include <linux/cpumask.h> #include <linux/spinlock.h> #include <linux/percpu.h> #include "bpf_lru_list.h" #define LOCAL_FREE_TARGET (128) #define LOCAL_NR_SCANS LOCAL_FREE_TARGET #define PERCPU_FREE_TARGET (4) #define PERCPU_NR_SCANS PERCPU_FREE_TARGET /* Helpers to get the local list index */ #define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET) #define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE) #define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING) #define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET) static int get_next_cpu(int cpu) { cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = cpumask_first(cpu_possible_mask); return cpu; } /* Local list helpers */ static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l) { return &loc_l->lists[LOCAL_FREE_LIST_IDX]; } static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) { return &loc_l->lists[LOCAL_PENDING_LIST_IDX]; } /* bpf_lru_node helpers */ static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) { return READ_ONCE(node->ref); } static void bpf_lru_node_clear_ref(struct bpf_lru_node *node) { WRITE_ONCE(node->ref, 0); } static void bpf_lru_list_count_inc(struct bpf_lru_list *l, enum bpf_lru_list_type type) { if (type < NR_BPF_LRU_LIST_COUNT) l->counts[type]++; } static void bpf_lru_list_count_dec(struct bpf_lru_list *l, enum bpf_lru_list_type type) { if (type < NR_BPF_LRU_LIST_COUNT) l->counts[type]--; } static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l, struct bpf_lru_node *node, struct list_head *free_list, enum bpf_lru_list_type tgt_free_type) { if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) return; /* If the removing node is the next_inactive_rotation candidate, * move the next_inactive_rotation pointer also. */ if (&node->list == l->next_inactive_rotation) l->next_inactive_rotation = l->next_inactive_rotation->prev; bpf_lru_list_count_dec(l, node->type); node->type = tgt_free_type; list_move(&node->list, free_list); } /* Move nodes from local list to the LRU list */ static void __bpf_lru_node_move_in(struct bpf_lru_list *l, struct bpf_lru_node *node, enum bpf_lru_list_type tgt_type) { if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) || WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) return; bpf_lru_list_count_inc(l, tgt_type); node->type = tgt_type; bpf_lru_node_clear_ref(node); list_move(&node->list, &l->lists[tgt_type]); } /* Move nodes between or within active and inactive list (like * active to inactive, inactive to active or tail of active back to * the head of active). */ static void __bpf_lru_node_move(struct bpf_lru_list *l, struct bpf_lru_node *node, enum bpf_lru_list_type tgt_type) { if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) || WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) return; if (node->type != tgt_type) { bpf_lru_list_count_dec(l, node->type); bpf_lru_list_count_inc(l, tgt_type); node->type = tgt_type; } bpf_lru_node_clear_ref(node); /* If the moving node is the next_inactive_rotation candidate, * move the next_inactive_rotation pointer also. */ if (&node->list == l->next_inactive_rotation) l->next_inactive_rotation = l->next_inactive_rotation->prev; list_move(&node->list, &l->lists[tgt_type]); } static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l) { return l->counts[BPF_LRU_LIST_T_INACTIVE] < l->counts[BPF_LRU_LIST_T_ACTIVE]; } /* Rotate the active list: * 1. Start from tail * 2. If the node has the ref bit set, it will be rotated * back to the head of active list with the ref bit cleared. * Give this node one more chance to survive in the active list. * 3. If the ref bit is not set, move it to the head of the * inactive list. * 4. It will at most scan nr_scans nodes */ static void __bpf_lru_list_rotate_active(struct bpf_lru *lru, struct bpf_lru_list *l) { struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE]; struct bpf_lru_node *node, *tmp_node, *first_node; unsigned int i = 0; first_node = list_first_entry(active, struct bpf_lru_node, list); list_for_each_entry_safe_reverse(node, tmp_node, active, list) { if (bpf_lru_node_is_ref(node)) __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); else __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); if (++i == lru->nr_scans || node == first_node) break; } } /* Rotate the inactive list. It starts from the next_inactive_rotation * 1. If the node has ref bit set, it will be moved to the head * of active list with the ref bit cleared. * 2. If the node does not have ref bit set, it will leave it * at its current location (i.e. do nothing) so that it can * be considered during the next inactive_shrink. * 3. It will at most scan nr_scans nodes */ static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru, struct bpf_lru_list *l) { struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; struct list_head *cur, *last, *next = inactive; struct bpf_lru_node *node; unsigned int i = 0; if (list_empty(inactive)) return; last = l->next_inactive_rotation->next; if (last == inactive) last = last->next; cur = l->next_inactive_rotation; while (i < lru->nr_scans) { if (cur == inactive) { cur = cur->prev; continue; } node = list_entry(cur, struct bpf_lru_node, list); next = cur->prev; if (bpf_lru_node_is_ref(node)) __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); if (cur == last) break; cur = next; i++; } l->next_inactive_rotation = next; } /* Shrink the inactive list. It starts from the tail of the * inactive list and only move the nodes without the ref bit * set to the designated free list. */ static unsigned int __bpf_lru_list_shrink_inactive(struct bpf_lru *lru, struct bpf_lru_list *l, unsigned int tgt_nshrink, struct list_head *free_list, enum bpf_lru_list_type tgt_free_type) { struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; struct bpf_lru_node *node, *tmp_node; unsigned int nshrinked = 0; unsigned int i = 0; list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) { if (bpf_lru_node_is_ref(node)) { __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); } else if (lru->del_from_htab(lru->del_arg, node)) { __bpf_lru_node_move_to_free(l, node, free_list, tgt_free_type); if (++nshrinked == tgt_nshrink) break; } if (++i == lru->nr_scans) break; } return nshrinked; } /* 1. Rotate the active list (if needed) * 2. Always rotate the inactive list */ static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l) { if (bpf_lru_list_inactive_low(l)) __bpf_lru_list_rotate_active(lru, l); __bpf_lru_list_rotate_inactive(lru, l); } /* Calls __bpf_lru_list_shrink_inactive() to shrink some * ref-bit-cleared nodes and move them to the designated * free list. * * If it cannot get a free node after calling * __bpf_lru_list_shrink_inactive(). It will just remove * one node from either inactive or active list without * honoring the ref-bit. It prefers inactive list to active * list in this situation. */ static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru, struct bpf_lru_list *l, unsigned int tgt_nshrink, struct list_head *free_list, enum bpf_lru_list_type tgt_free_type) { struct bpf_lru_node *node, *tmp_node; struct list_head *force_shrink_list; unsigned int nshrinked; nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink, free_list, tgt_free_type); if (nshrinked) return nshrinked; /* Do a force shrink by ignoring the reference bit */ if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE])) force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE]; else force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE]; list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list, list) { if (lru->del_from_htab(lru->del_arg, node)) { __bpf_lru_node_move_to_free(l, node, free_list, tgt_free_type); return 1; } } return 0; } /* Flush the nodes from the local pending list to the LRU list */ static void __local_list_flush(struct bpf_lru_list *l, struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node, *tmp_node; list_for_each_entry_safe_reverse(node, tmp_node, local_pending_list(loc_l), list) { if (bpf_lru_node_is_ref(node)) __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE); else __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_INACTIVE); } } static void bpf_lru_list_push_free(struct bpf_lru_list *l, struct bpf_lru_node *node) { unsigned long flags; if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) return; raw_spin_lock_irqsave(&l->lock, flags); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); raw_spin_unlock_irqrestore(&l->lock, flags); } static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l) { struct bpf_lru_list *l = &lru->common_lru.lru_list; struct bpf_lru_node *node, *tmp_node; unsigned int nfree = 0; raw_spin_lock(&l->lock); __local_list_flush(l, loc_l); __bpf_lru_list_rotate(lru, l); list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE], list) { __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); if (++nfree == LOCAL_FREE_TARGET) break; } if (nfree < LOCAL_FREE_TARGET) __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); raw_spin_unlock(&l->lock); } static void __local_list_add_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l, int cpu, struct bpf_lru_node *node, u32 hash) { *(u32 *)((void *)node + lru->hash_offset) = hash; node->cpu = cpu; node->type = BPF_LRU_LOCAL_LIST_T_PENDING; bpf_lru_node_clear_ref(node); list_add(&node->list, local_pending_list(loc_l)); } static struct bpf_lru_node * __local_list_pop_free(struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node; node = list_first_entry_or_null(local_free_list(loc_l), struct bpf_lru_node, list); if (node) list_del(&node->list); return node; } static struct bpf_lru_node * __local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node; bool force = false; ignore_ref: /* Get from the tail (i.e. older element) of the pending list. */ list_for_each_entry_reverse(node, local_pending_list(loc_l), list) { if ((!bpf_lru_node_is_ref(node) || force) && lru->del_from_htab(lru->del_arg, node)) { list_del(&node->list); return node; } } if (!force) { force = true; goto ignore_ref; } return NULL; } static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, u32 hash) { struct list_head *free_list; struct bpf_lru_node *node = NULL; struct bpf_lru_list *l; unsigned long flags; int cpu = raw_smp_processor_id(); l = per_cpu_ptr(lru->percpu_lru, cpu); raw_spin_lock_irqsave(&l->lock, flags); __bpf_lru_list_rotate(lru, l); free_list = &l->lists[BPF_LRU_LIST_T_FREE]; if (list_empty(free_list)) __bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list, BPF_LRU_LIST_T_FREE); if (!list_empty(free_list)) { node = list_first_entry(free_list, struct bpf_lru_node, list); *(u32 *)((void *)node + lru->hash_offset) = hash; bpf_lru_node_clear_ref(node); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); } raw_spin_unlock_irqrestore(&l->lock, flags); return node; } static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, u32 hash) { struct bpf_lru_locallist *loc_l, *steal_loc_l; struct bpf_common_lru *clru = &lru->common_lru; struct bpf_lru_node *node; int steal, first_steal; unsigned long flags; int cpu = raw_smp_processor_id(); loc_l = per_cpu_ptr(clru->local_list, cpu); raw_spin_lock_irqsave(&loc_l->lock, flags); node = __local_list_pop_free(loc_l); if (!node) { bpf_lru_list_pop_free_to_local(lru, loc_l); node = __local_list_pop_free(loc_l); } if (node) __local_list_add_pending(lru, loc_l, cpu, node, hash); raw_spin_unlock_irqrestore(&loc_l->lock, flags); if (node) return node; /* No free nodes found from the local free list and * the global LRU list. * * Steal from the local free/pending list of the * current CPU and remote CPU in RR. It starts * with the loc_l->next_steal CPU. */ first_steal = loc_l->next_steal; steal = first_steal; do { steal_loc_l = per_cpu_ptr(clru->local_list, steal); raw_spin_lock_irqsave(&steal_loc_l->lock, flags); node = __local_list_pop_free(steal_loc_l); if (!node) node = __local_list_pop_pending(lru, steal_loc_l); raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags); steal = get_next_cpu(steal); } while (!node && steal != first_steal); loc_l->next_steal = steal; if (node) { raw_spin_lock_irqsave(&loc_l->lock, flags); __local_list_add_pending(lru, loc_l, cpu, node, hash); raw_spin_unlock_irqrestore(&loc_l->lock, flags); } return node; } struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash) { if (lru->percpu) return bpf_percpu_lru_pop_free(lru, hash); else return bpf_common_lru_pop_free(lru, hash); } static void bpf_common_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { u8 node_type = READ_ONCE(node->type); unsigned long flags; if (WARN_ON_ONCE(node_type == BPF_LRU_LIST_T_FREE) || WARN_ON_ONCE(node_type == BPF_LRU_LOCAL_LIST_T_FREE)) return; if (node_type == BPF_LRU_LOCAL_LIST_T_PENDING) { struct bpf_lru_locallist *loc_l; loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu); raw_spin_lock_irqsave(&loc_l->lock, flags); if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) { raw_spin_unlock_irqrestore(&loc_l->lock, flags); goto check_lru_list; } node->type = BPF_LRU_LOCAL_LIST_T_FREE; bpf_lru_node_clear_ref(node); list_move(&node->list, local_free_list(loc_l)); raw_spin_unlock_irqrestore(&loc_l->lock, flags); return; } check_lru_list: bpf_lru_list_push_free(&lru->common_lru.lru_list, node); } static void bpf_percpu_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { struct bpf_lru_list *l; unsigned long flags; l = per_cpu_ptr(lru->percpu_lru, node->cpu); raw_spin_lock_irqsave(&l->lock, flags); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); raw_spin_unlock_irqrestore(&l->lock, flags); } void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { if (lru->percpu) bpf_percpu_lru_push_free(lru, node); else bpf_common_lru_push_free(lru, node); } static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems) { struct bpf_lru_list *l = &lru->common_lru.lru_list; u32 i; for (i = 0; i < nr_elems; i++) { struct bpf_lru_node *node; node = (struct bpf_lru_node *)(buf + node_offset); node->type = BPF_LRU_LIST_T_FREE; bpf_lru_node_clear_ref(node); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf += elem_size; } } static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems) { u32 i, pcpu_entries; int cpu; struct bpf_lru_list *l; pcpu_entries = nr_elems / num_possible_cpus(); i = 0; for_each_possible_cpu(cpu) { struct bpf_lru_node *node; l = per_cpu_ptr(lru->percpu_lru, cpu); again: node = (struct bpf_lru_node *)(buf + node_offset); node->cpu = cpu; node->type = BPF_LRU_LIST_T_FREE; bpf_lru_node_clear_ref(node); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); i++; buf += elem_size; if (i == nr_elems) break; if (i % pcpu_entries) goto again; } } void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems) { if (lru->percpu) bpf_percpu_lru_populate(lru, buf, node_offset, elem_size, nr_elems); else bpf_common_lru_populate(lru, buf, node_offset, elem_size, nr_elems); } static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu) { int i; for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++) INIT_LIST_HEAD(&loc_l->lists[i]); loc_l->next_steal = cpu; raw_spin_lock_init(&loc_l->lock); } static void bpf_lru_list_init(struct bpf_lru_list *l) { int i; for (i = 0; i < NR_BPF_LRU_LIST_T; i++) INIT_LIST_HEAD(&l->lists[i]); for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++) l->counts[i] = 0; l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; raw_spin_lock_init(&l->lock); } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, del_from_htab_func del_from_htab, void *del_arg) { int cpu; if (percpu) { lru->percpu_lru = alloc_percpu(struct bpf_lru_list); if (!lru->percpu_lru) return -ENOMEM; for_each_possible_cpu(cpu) { struct bpf_lru_list *l; l = per_cpu_ptr(lru->percpu_lru, cpu); bpf_lru_list_init(l); } lru->nr_scans = PERCPU_NR_SCANS; } else { struct bpf_common_lru *clru = &lru->common_lru; clru->local_list = alloc_percpu(struct bpf_lru_locallist); if (!clru->local_list) return -ENOMEM; for_each_possible_cpu(cpu) { struct bpf_lru_locallist *loc_l; loc_l = per_cpu_ptr(clru->local_list, cpu); bpf_lru_locallist_init(loc_l, cpu); } bpf_lru_list_init(&clru->lru_list); lru->nr_scans = LOCAL_NR_SCANS; } lru->percpu = percpu; lru->del_from_htab = del_from_htab; lru->del_arg = del_arg; lru->hash_offset = hash_offset; return 0; } void bpf_lru_destroy(struct bpf_lru *lru) { if (lru->percpu) free_percpu(lru->percpu_lru); else free_percpu(lru->common_lru.local_list); }
2 2 2 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 // SPDX-License-Identifier: GPL-2.0-only /* * Generic show_mem() implementation * * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de> */ #include <linux/blkdev.h> #include <linux/cma.h> #include <linux/cpuset.h> #include <linux/highmem.h> #include <linux/hugetlb.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/swap.h> #include <linux/vmstat.h> #include "internal.h" #include "swap.h" atomic_long_t _totalram_pages __read_mostly; EXPORT_SYMBOL(_totalram_pages); unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; static inline void show_node(struct zone *zone) { if (IS_ENABLED(CONFIG_NUMA)) printk("Node %d ", zone_to_nid(zone)); } long si_mem_available(void) { long available; unsigned long pagecache; unsigned long wmark_low = 0; unsigned long reclaimable; struct zone *zone; for_each_zone(zone) wmark_low += low_wmark_pages(zone); /* * Estimate the amount of memory available for userspace allocations, * without causing swapping or OOM. */ available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages; /* * Not all the page cache can be freed, otherwise the system will * start swapping or thrashing. Assume at least half of the page * cache, or the low watermark worth of cache, needs to stay. */ pagecache = global_node_page_state(NR_ACTIVE_FILE) + global_node_page_state(NR_INACTIVE_FILE); pagecache -= min(pagecache / 2, wmark_low); available += pagecache; /* * Part of the reclaimable slab and other kernel memory consists of * items that are in use, and cannot be freed. Cap this estimate at the * low watermark. */ reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); reclaimable -= min(reclaimable / 2, wmark_low); available += reclaimable; if (available < 0) available = 0; return available; } EXPORT_SYMBOL_GPL(si_mem_available); void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages(); val->sharedram = global_node_page_state(NR_SHMEM); val->freeram = global_zone_page_state(NR_FREE_PAGES); val->bufferram = nr_blockdev_pages(); val->totalhigh = totalhigh_pages(); val->freehigh = nr_free_highpages(); val->mem_unit = PAGE_SIZE; } EXPORT_SYMBOL(si_meminfo); #ifdef CONFIG_NUMA void si_meminfo_node(struct sysinfo *val, int nid) { int zone_type; /* needs to be signed */ unsigned long managed_pages = 0; unsigned long managed_highpages = 0; unsigned long free_highpages = 0; pg_data_t *pgdat = NODE_DATA(nid); for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]); val->totalram = managed_pages; val->sharedram = node_page_state(pgdat, NR_SHMEM); val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); #ifdef CONFIG_HIGHMEM for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; if (is_highmem(zone)) { managed_highpages += zone_managed_pages(zone); free_highpages += zone_page_state(zone, NR_FREE_PAGES); } } val->totalhigh = managed_highpages; val->freehigh = free_highpages; #else val->totalhigh = managed_highpages; val->freehigh = free_highpages; #endif val->mem_unit = PAGE_SIZE; } #endif /* * Determine whether the node should be displayed or not, depending on whether * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). */ static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask) { if (!(flags & SHOW_MEM_FILTER_NODES)) return false; /* * no node mask - aka implicit memory numa policy. Do not bother with * the synchronization - read_mems_allowed_begin - because we do not * have to be precise here. */ if (!nodemask) nodemask = &cpuset_current_mems_allowed; return !node_isset(nid, *nodemask); } static void show_migration_types(unsigned char type) { static const char types[MIGRATE_TYPES] = { [MIGRATE_UNMOVABLE] = 'U', [MIGRATE_MOVABLE] = 'M', [MIGRATE_RECLAIMABLE] = 'E', [MIGRATE_HIGHATOMIC] = 'H', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', #endif #ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = 'I', #endif }; char tmp[MIGRATE_TYPES + 1]; char *p = tmp; int i; for (i = 0; i < MIGRATE_TYPES; i++) { if (type & (1 << i)) *p++ = types[i]; } *p = '\0'; printk(KERN_CONT "(%s) ", tmp); } static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx) { int zone_idx; for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++) if (zone_managed_pages(pgdat->node_zones + zone_idx)) return true; return false; } /* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. * * Bits in @filter: * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's * cpuset. */ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long free_pcp = 0; int cpu, nid; struct zone *zone; pg_data_t *pgdat; for_each_populated_zone(zone) { if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; } printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu dirty:%lu writeback:%lu\n" " slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu shmem:%lu pagetables:%lu\n" " sec_pagetables:%lu bounce:%lu\n" " kernel_misc_reclaimable:%lu\n" " free:%lu free_pcp:%lu free_cma:%lu\n", global_node_page_state(NR_ACTIVE_ANON), global_node_page_state(NR_INACTIVE_ANON), global_node_page_state(NR_ISOLATED_ANON), global_node_page_state(NR_ACTIVE_FILE), global_node_page_state(NR_INACTIVE_FILE), global_node_page_state(NR_ISOLATED_FILE), global_node_page_state(NR_UNEVICTABLE), global_node_page_state(NR_FILE_DIRTY), global_node_page_state(NR_WRITEBACK), global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B), global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), global_node_page_state(NR_PAGETABLE), global_node_page_state(NR_SECONDARY_PAGETABLE), global_zone_page_state(NR_BOUNCE), global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE), global_zone_page_state(NR_FREE_PAGES), free_pcp, global_zone_page_state(NR_FREE_CMA_PAGES)); for_each_online_pgdat(pgdat) { if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) continue; if (!node_has_managed_zones(pgdat, max_zone_idx)) continue; printk("Node %d" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " isolated(anon):%lukB" " isolated(file):%lukB" " mapped:%lukB" " dirty:%lukB" " writeback:%lukB" " shmem:%lukB" #ifdef CONFIG_TRANSPARENT_HUGEPAGE " shmem_thp:%lukB" " shmem_pmdmapped:%lukB" " anon_thp:%lukB" #endif " writeback_tmp:%lukB" " kernel_stack:%lukB" #ifdef CONFIG_SHADOW_CALL_STACK " shadow_call_stack:%lukB" #endif " pagetables:%lukB" " sec_pagetables:%lukB" " all_unreclaimable? %s" "\n", pgdat->node_id, K(node_page_state(pgdat, NR_ACTIVE_ANON)), K(node_page_state(pgdat, NR_INACTIVE_ANON)), K(node_page_state(pgdat, NR_ACTIVE_FILE)), K(node_page_state(pgdat, NR_INACTIVE_FILE)), K(node_page_state(pgdat, NR_UNEVICTABLE)), K(node_page_state(pgdat, NR_ISOLATED_ANON)), K(node_page_state(pgdat, NR_ISOLATED_FILE)), K(node_page_state(pgdat, NR_FILE_MAPPED)), K(node_page_state(pgdat, NR_FILE_DIRTY)), K(node_page_state(pgdat, NR_WRITEBACK)), K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE K(node_page_state(pgdat, NR_SHMEM_THPS)), K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), K(node_page_state(pgdat, NR_ANON_THPS)), #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), node_page_state(pgdat, NR_KERNEL_STACK_KB), #ifdef CONFIG_SHADOW_CALL_STACK node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif K(node_page_state(pgdat, NR_PAGETABLE)), K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? "yes" : "no"); } for_each_populated_zone(zone) { int i; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; free_pcp = 0; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; show_node(zone); printk(KERN_CONT "%s" " free:%lukB" " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" " reserved_highatomic:%luKB" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " writepending:%lukB" " present:%lukB" " managed:%lukB" " mlocked:%lukB" " bounce:%lukB" " free_pcp:%lukB" " local_pcp:%ukB" " free_cma:%lukB" "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), K(zone->nr_reserved_highatomic), K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), K(this_cpu_read(zone->per_cpu_pageset->count)), K(zone_page_state(zone, NR_FREE_CMA_PAGES))); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) printk(KERN_CONT " %ld", zone->lowmem_reserve[i]); printk(KERN_CONT "\n"); } for_each_populated_zone(zone) { unsigned int order; unsigned long nr[NR_PAGE_ORDERS], flags, total = 0; unsigned char types[NR_PAGE_ORDERS]; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; show_node(zone); printk(KERN_CONT "%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &zone->free_area[order]; int type; nr[order] = area->nr_free; total += nr[order] << order; types[order] = 0; for (type = 0; type < MIGRATE_TYPES; type++) { if (!free_area_empty(area, type)) types[order] |= 1 << type; } } spin_unlock_irqrestore(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { printk(KERN_CONT "%lu*%lukB ", nr[order], K(1UL) << order); if (nr[order]) show_migration_types(types[order]); } printk(KERN_CONT "= %lukB\n", K(total)); } for_each_online_node(nid) { if (show_mem_node_skip(filter, nid, nodemask)) continue; hugetlb_show_meminfo_node(nid); } printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES)); show_swap_cache_info(); } void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long total = 0, reserved = 0, highmem = 0; struct zone *zone; printk("Mem-Info:\n"); show_free_areas(filter, nodemask, max_zone_idx); for_each_populated_zone(zone) { total += zone->present_pages; reserved += zone->present_pages - zone_managed_pages(zone); if (is_highmem(zone)) highmem += zone->present_pages; } printk("%lu pages RAM\n", total); printk("%lu pages HighMem/MovableOnly\n", highmem); printk("%lu pages reserved\n", reserved); #ifdef CONFIG_CMA printk("%lu pages cma reserved\n", totalcma_pages); #endif #ifdef CONFIG_MEMORY_FAILURE printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); #endif }
6 3 6 6 6 4 6 5 6 3 6 9 8 9 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 // SPDX-License-Identifier: GPL-2.0-only /* * test/set flag bits stored in conntrack extension area. * * (C) 2013 Astaro GmbH & Co KG */ #include <linux/export.h> #include <linux/types.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; do { old = *address; tmp = (old & mask) ^ new; if (old == tmp) return 0; } while (cmpxchg(address, old, tmp) != old); return 1; } int nf_connlabels_replace(struct nf_conn *ct, const u32 *data, const u32 *mask, unsigned int words32) { struct nf_conn_labels *labels; unsigned int size, i; int changed = 0; u32 *dst; labels = nf_ct_labels_find(ct); if (!labels) return -ENOSPC; size = sizeof(labels->bits); if (size < (words32 * sizeof(u32))) words32 = size / sizeof(u32); dst = (u32 *) labels->bits; for (i = 0; i < words32; i++) changed |= replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); size /= sizeof(u32); for (i = words32; i < size; i++) /* pad */ replace_u32(&dst[i], 0, 0); if (changed) nf_conntrack_event_cache(IPCT_LABEL, ct); return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_replace); int nf_connlabels_get(struct net *net, unsigned int bits) { int v; if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); v = atomic_inc_return_relaxed(&net->ct.labels_used); WARN_ON_ONCE(v <= 0); return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_get); void nf_connlabels_put(struct net *net) { int v = atomic_dec_return_relaxed(&net->ct.labels_used); WARN_ON_ONCE(v < 0); } EXPORT_SYMBOL_GPL(nf_connlabels_put);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 /* SPDX-License-Identifier: GPL-2.0-only */ /* * AppArmor security module * * This file contains AppArmor auditing function definitions. * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #ifndef __AA_AUDIT_H #define __AA_AUDIT_H #include <linux/audit.h> #include <linux/fs.h> #include <linux/lsm_audit.h> #include <linux/sched.h> #include <linux/slab.h> #include "file.h" #include "label.h" extern const char *const audit_mode_names[]; #define AUDIT_MAX_INDEX 5 enum audit_mode { AUDIT_NORMAL, /* follow normal auditing of accesses */ AUDIT_QUIET_DENIED, /* quiet all denied access messages */ AUDIT_QUIET, /* quiet all messages */ AUDIT_NOQUIET, /* do not quiet audit messages */ AUDIT_ALL /* audit all accesses */ }; enum audit_type { AUDIT_APPARMOR_AUDIT, AUDIT_APPARMOR_ALLOWED, AUDIT_APPARMOR_DENIED, AUDIT_APPARMOR_HINT, AUDIT_APPARMOR_STATUS, AUDIT_APPARMOR_ERROR, AUDIT_APPARMOR_KILL, AUDIT_APPARMOR_AUTO }; #define OP_NULL NULL #define OP_SYSCTL "sysctl" #define OP_CAPABLE "capable" #define OP_UNLINK "unlink" #define OP_MKDIR "mkdir" #define OP_RMDIR "rmdir" #define OP_MKNOD "mknod" #define OP_TRUNC "truncate" #define OP_LINK "link" #define OP_SYMLINK "symlink" #define OP_RENAME_SRC "rename_src" #define OP_RENAME_DEST "rename_dest" #define OP_CHMOD "chmod" #define OP_CHOWN "chown" #define OP_GETATTR "getattr" #define OP_OPEN "open" #define OP_FRECEIVE "file_receive" #define OP_FPERM "file_perm" #define OP_FLOCK "file_lock" #define OP_FMMAP "file_mmap" #define OP_FMPROT "file_mprotect" #define OP_INHERIT "file_inherit" #define OP_PIVOTROOT "pivotroot" #define OP_MOUNT "mount" #define OP_UMOUNT "umount" #define OP_CREATE "create" #define OP_POST_CREATE "post_create" #define OP_BIND "bind" #define OP_CONNECT "connect" #define OP_LISTEN "listen" #define OP_ACCEPT "accept" #define OP_SENDMSG "sendmsg" #define OP_RECVMSG "recvmsg" #define OP_GETSOCKNAME "getsockname" #define OP_GETPEERNAME "getpeername" #define OP_GETSOCKOPT "getsockopt" #define OP_SETSOCKOPT "setsockopt" #define OP_SHUTDOWN "socket_shutdown" #define OP_PTRACE "ptrace" #define OP_SIGNAL "signal" #define OP_EXEC "exec" #define OP_CHANGE_HAT "change_hat" #define OP_CHANGE_PROFILE "change_profile" #define OP_CHANGE_ONEXEC "change_onexec" #define OP_STACK "stack" #define OP_STACK_ONEXEC "stack_onexec" #define OP_SETPROCATTR "setprocattr" #define OP_SETRLIMIT "setrlimit" #define OP_PROF_REPL "profile_replace" #define OP_PROF_LOAD "profile_load" #define OP_PROF_RM "profile_remove" #define OP_USERNS_CREATE "userns_create" #define OP_URING_OVERRIDE "uring_override" #define OP_URING_SQPOLL "uring_sqpoll" struct apparmor_audit_data { int error; int type; u16 class; const char *op; const struct cred *subj_cred; struct aa_label *subj_label; const char *name; const char *info; u32 request; u32 denied; union { /* these entries require a custom callback fn */ struct { struct aa_label *peer; union { struct { const char *target; kuid_t ouid; } fs; struct { int rlim; unsigned long max; } rlim; struct { int signal; int unmappedsig; }; struct { int type, protocol; struct sock *peer_sk; void *addr; int addrlen; } net; }; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; struct { const char *src_name; const char *type; const char *trans; const char *data; unsigned long flags; } mnt; struct { struct aa_label *target; } uring; }; struct common_audit_data common; }; /* macros for dealing with apparmor_audit_data structure */ #define aad(SA) (container_of(SA, struct apparmor_audit_data, common)) #define aad_of_va(VA) aad((struct common_audit_data *)(VA)) #define DEFINE_AUDIT_DATA(NAME, T, C, X) \ /* TODO: cleanup audit init so we don't need _aad = {0,} */ \ struct apparmor_audit_data NAME = { \ .class = (C), \ .op = (X), \ .common.type = (T), \ .common.u.tsk = NULL, \ .common.apparmor_audit_data = &NAME, \ }; void aa_audit_msg(int type, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)); int aa_audit(int type, struct aa_profile *profile, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)); #define aa_audit_error(ERROR, AD, CB) \ ({ \ (AD)->error = (ERROR); \ aa_audit_msg(AUDIT_APPARMOR_ERROR, (AD), (CB)); \ (AD)->error; \ }) static inline int complain_error(int error) { if (error == -EPERM || error == -EACCES) return 0; return error; } void aa_audit_rule_free(void *vrule); int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule); int aa_audit_rule_known(struct audit_krule *rule); int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule); #endif /* __AA_AUDIT_H */
3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 5 4 5 1 5 2 2 1 4 4 1 5 5 5 5 5 1 1 1 1 5 3 3 5 5 5 5 5 5 5 5 5 5 5 3 5 5 5 4 4 4 4 4 1 1 1 1 1 1 1 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 // SPDX-License-Identifier: GPL-2.0 #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/wext.h> #include <net/hotdata.h> #include "dev.h" static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos) { unsigned long ifindex = *pos; struct net_device *dev; for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { *pos = dev->ifindex; return dev; } return NULL; } static void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; return dev_seq_from_index(seq, pos); } static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return dev_seq_from_index(seq, pos); } static void dev_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, stats->rx_fifo_errors, stats->rx_length_errors + stats->rx_over_errors + stats->rx_crc_errors + stats->rx_frame_errors, stats->rx_compressed, stats->multicast, stats->tx_bytes, stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, stats->tx_carrier_errors + stats->tx_aborted_errors + stats->tx_window_errors + stats->tx_heartbeat_errors, stats->tx_compressed); } /* * Called from the PROCfs module. This now uses the new arbitrary sized * /proc/net interface to create /proc/net/dev */ static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "Inter-| Receive " " | Transmit\n" " face |bytes packets errs drop fifo frame " "compressed multicast|bytes packets errs " "drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; } static u32 softnet_input_pkt_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->input_pkt_queue); } static u32 softnet_process_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->process_queue); } static struct softnet_data *softnet_get_online(loff_t *pos) { struct softnet_data *sd = NULL; while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { sd = &per_cpu(softnet_data, *pos); break; } else ++*pos; return sd; } static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) { return softnet_get_online(pos); } static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return softnet_get_online(pos); } static void softnet_seq_stop(struct seq_file *seq, void *v) { } static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; u32 input_qlen = softnet_input_pkt_queue_len(sd); u32 process_qlen = softnet_process_queue_len(sd); unsigned int flow_limit_count = 0; #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit *fl; rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) flow_limit_count = fl->count; rcu_read_unlock(); #endif /* the index is the CPU id owing this sd. Since offline CPUs are not * displayed, it would be othrwise not trivial for the user-space * mapping the data a specific CPU */ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ sd->received_rps, flow_limit_count, input_qlen + process_qlen, (int)seq->index, input_qlen, process_qlen); return 0; } static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_seq_show, }; static const struct seq_operations softnet_seq_ops = { .start = softnet_seq_start, .next = softnet_seq_next, .stop = softnet_seq_stop, .show = softnet_seq_show, }; static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; struct net_device *dev; loff_t i = 0; int t; for_each_netdev_rcu(seq_file_net(seq), dev) { ptype_list = &dev->ptype_all; list_for_each_entry_rcu(pt, ptype_list, list) { if (i == pos) return pt; ++i; } } list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) { if (i == pos) return pt; ++i; } for (t = 0; t < PTYPE_HASH_SIZE; t++) { list_for_each_entry_rcu(pt, &ptype_base[t], list) { if (i == pos) return pt; ++i; } } return NULL; } static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; if (pt->dev) { if (nxt != &pt->dev->ptype_all) goto found; dev = pt->dev; for_each_netdev_continue_rcu(seq_file_net(seq), dev) { if (!list_empty(&dev->ptype_all)) { nxt = dev->ptype_all.next; goto found; } } nxt = net_hotdata.ptype_all.next; goto ptype_all; } if (pt->type == htons(ETH_P_ALL)) { ptype_all: if (nxt != &net_hotdata.ptype_all) goto found; hash = 0; nxt = ptype_base[0].next; } else hash = ntohs(pt->type) & PTYPE_HASH_MASK; while (nxt == &ptype_base[hash]) { if (++hash >= PTYPE_HASH_SIZE) return NULL; nxt = ptype_base[hash].next; } found: return list_entry(nxt, struct packet_type, list); } static void ptype_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); seq_printf(seq, " %-8s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } return 0; } static const struct seq_operations ptype_seq_ops = { .start = ptype_seq_start, .next = ptype_seq_next, .stop = ptype_seq_stop, .show = ptype_seq_show, }; static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, sizeof(struct seq_net_private))) goto out; if (!proc_create_seq("softnet_stat", 0444, net->proc_net, &softnet_seq_ops)) goto out_dev; if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, sizeof(struct seq_net_private))) goto out_softnet; if (wext_proc_init(net)) goto out_ptype; rc = 0; out: return rc; out_ptype: remove_proc_entry("ptype", net->proc_net); out_softnet: remove_proc_entry("softnet_stat", net->proc_net); out_dev: remove_proc_entry("dev", net->proc_net); goto out; } static void __net_exit dev_proc_net_exit(struct net *net) { wext_proc_exit(net); remove_proc_entry("ptype", net->proc_net); remove_proc_entry("softnet_stat", net->proc_net); remove_proc_entry("dev", net->proc_net); } static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct netdev_hw_addr *ha; struct net_device *dev = v; if (v == SEQ_START_TOKEN) return 0; netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); } netif_addr_unlock_bh(dev); return 0; } static const struct seq_operations dev_mc_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_mc_seq_show, }; static int __net_init dev_mc_net_init(struct net *net) { if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } static void __net_exit dev_mc_net_exit(struct net *net) { remove_proc_entry("dev_mcast", net->proc_net); } static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, }; int __init dev_proc_init(void) { int ret = register_pernet_subsys(&dev_proc_ops); if (!ret) return register_pernet_subsys(&dev_mc_net_ops); return ret; }
4 87 87 87 86 87 38 86 87 84 87 87 87 80 87 8 3 8 8 8 8 8 4 4 4 8 8 8 8 92 38 80 83 84 84 84 75 38 38 38 38 38 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 // SPDX-License-Identifier: GPL-2.0-only /* * Power Management Quality of Service (PM QoS) support base. * * Copyright (C) 2020 Intel Corporation * * Authors: * Mark Gross <mgross@linux.intel.com> * Rafael J. Wysocki <rafael.j.wysocki@intel.com> * * Provided here is an interface for specifying PM QoS dependencies. It allows * entities depending on QoS constraints to register their requests which are * aggregated as appropriate to produce effective constraints (target values) * that can be monitored by entities needing to respect them, either by polling * or through a built-in notification mechanism. * * In addition to the basic functionality, more specific interfaces for managing * global CPU latency QoS requests and frequency QoS requests are provided. */ /*#define DEBUG*/ #include <linux/pm_qos.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/fs.h> #include <linux/device.h> #include <linux/miscdevice.h> #include <linux/string.h> #include <linux/platform_device.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/export.h> #include <trace/events/power.h> /* * locking rule: all changes to constraints or notifiers lists * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ static DEFINE_SPINLOCK(pm_qos_lock); /** * pm_qos_read_value - Return the current effective constraint value. * @c: List of PM QoS constraint requests. */ s32 pm_qos_read_value(struct pm_qos_constraints *c) { return READ_ONCE(c->target_value); } static int pm_qos_get_value(struct pm_qos_constraints *c) { if (plist_head_empty(&c->list)) return c->no_constraint_value; switch (c->type) { case PM_QOS_MIN: return plist_first(&c->list)->prio; case PM_QOS_MAX: return plist_last(&c->list)->prio; default: WARN(1, "Unknown PM QoS type in %s\n", __func__); return PM_QOS_DEFAULT_VALUE; } } static void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) { WRITE_ONCE(c->target_value, value); } /** * pm_qos_update_target - Update a list of PM QoS constraint requests. * @c: List of PM QoS requests. * @node: Target list entry. * @action: Action to carry out (add, update or remove). * @value: New request value for the target list entry. * * Update the given list of PM QoS constraint requests, @c, by carrying an * @action involving the @node list entry and @value on it. * * The recognized values of @action are PM_QOS_ADD_REQ (store @value in @node * and add it to the list), PM_QOS_UPDATE_REQ (remove @node from the list, store * @value in it and add it to the list again), and PM_QOS_REMOVE_REQ (remove * @node from the list, ignore @value). * * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value) { int prev_value, curr_value, new_value; unsigned long flags; spin_lock_irqsave(&pm_qos_lock, flags); prev_value = pm_qos_get_value(c); if (value == PM_QOS_DEFAULT_VALUE) new_value = c->default_value; else new_value = value; switch (action) { case PM_QOS_REMOVE_REQ: plist_del(node, &c->list); break; case PM_QOS_UPDATE_REQ: /* * To change the list, atomically remove, reinit with new value * and add, then see if the aggregate has changed. */ plist_del(node, &c->list); fallthrough; case PM_QOS_ADD_REQ: plist_node_init(node, new_value); plist_add(node, &c->list); break; default: /* no action */ ; } curr_value = pm_qos_get_value(c); pm_qos_set_value(c, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); trace_pm_qos_update_target(action, prev_value, curr_value); if (prev_value == curr_value) return 0; if (c->notifiers) blocking_notifier_call_chain(c->notifiers, curr_value, NULL); return 1; } /** * pm_qos_flags_remove_req - Remove device PM QoS flags request. * @pqf: Device PM QoS flags set to remove the request from. * @req: Request to remove from the set. */ static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req) { s32 val = 0; list_del(&req->node); list_for_each_entry(req, &pqf->list, node) val |= req->flags; pqf->effective_flags = val; } /** * pm_qos_update_flags - Update a set of PM QoS flags. * @pqf: Set of PM QoS flags to update. * @req: Request to add to the set, to modify, or to remove from the set. * @action: Action to take on the set. * @val: Value of the request to add or modify. * * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, enum pm_qos_req_action action, s32 val) { unsigned long irqflags; s32 prev_value, curr_value; spin_lock_irqsave(&pm_qos_lock, irqflags); prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; switch (action) { case PM_QOS_REMOVE_REQ: pm_qos_flags_remove_req(pqf, req); break; case PM_QOS_UPDATE_REQ: pm_qos_flags_remove_req(pqf, req); fallthrough; case PM_QOS_ADD_REQ: req->flags = val; INIT_LIST_HEAD(&req->node); list_add_tail(&req->node, &pqf->list); pqf->effective_flags |= val; break; default: /* no action */ ; } curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; spin_unlock_irqrestore(&pm_qos_lock, irqflags); trace_pm_qos_update_flags(action, prev_value, curr_value); return prev_value != curr_value; } #ifdef CONFIG_CPU_IDLE /* Definitions related to the CPU latency QoS. */ static struct pm_qos_constraints cpu_latency_constraints = { .list = PLIST_HEAD_INIT(cpu_latency_constraints.list), .target_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .default_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .no_constraint_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .type = PM_QOS_MIN, }; static inline bool cpu_latency_qos_value_invalid(s32 value) { return value < 0 && value != PM_QOS_DEFAULT_VALUE; } /** * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit. */ s32 cpu_latency_qos_limit(void) { return pm_qos_read_value(&cpu_latency_constraints); } /** * cpu_latency_qos_request_active - Check the given PM QoS request. * @req: PM QoS request to check. * * Return: 'true' if @req has been added to the CPU latency QoS list, 'false' * otherwise. */ bool cpu_latency_qos_request_active(struct pm_qos_request *req) { return req->qos == &cpu_latency_constraints; } EXPORT_SYMBOL_GPL(cpu_latency_qos_request_active); static void cpu_latency_qos_apply(struct pm_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret = pm_qos_update_target(req->qos, &req->node, action, value); if (ret > 0) wake_up_all_idle_cpus(); } /** * cpu_latency_qos_add_request - Add new CPU latency QoS request. * @req: Pointer to a preallocated handle. * @value: Requested constraint value. * * Use @value to initialize the request handle pointed to by @req, insert it as * a new entry to the CPU latency QoS list and recompute the effective QoS * constraint for that list. * * Callers need to save the handle for later use in updates and removal of the * QoS request represented by it. */ void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value) { if (!req || cpu_latency_qos_value_invalid(value)) return; if (cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for already added request\n", __func__); return; } trace_pm_qos_add_request(value); req->qos = &cpu_latency_constraints; cpu_latency_qos_apply(req, PM_QOS_ADD_REQ, value); } EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request); /** * cpu_latency_qos_update_request - Modify existing CPU latency QoS request. * @req : QoS request to update. * @new_value: New requested constraint value. * * Use @new_value to update the QoS request represented by @req in the CPU * latency QoS list along with updating the effective constraint value for that * list. */ void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value) { if (!req || cpu_latency_qos_value_invalid(new_value)) return; if (!cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; } trace_pm_qos_update_request(new_value); if (new_value == req->node.prio) return; cpu_latency_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(cpu_latency_qos_update_request); /** * cpu_latency_qos_remove_request - Remove existing CPU latency QoS request. * @req: QoS request to remove. * * Remove the CPU latency QoS request represented by @req from the CPU latency * QoS list along with updating the effective constraint value for that list. */ void cpu_latency_qos_remove_request(struct pm_qos_request *req) { if (!req) return; if (!cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; } trace_pm_qos_remove_request(PM_QOS_DEFAULT_VALUE); cpu_latency_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } EXPORT_SYMBOL_GPL(cpu_latency_qos_remove_request); /* User space interface to the CPU latency QoS via misc device. */ static int cpu_latency_qos_open(struct inode *inode, struct file *filp) { struct pm_qos_request *req; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; cpu_latency_qos_add_request(req, PM_QOS_DEFAULT_VALUE); filp->private_data = req; return 0; } static int cpu_latency_qos_release(struct inode *inode, struct file *filp) { struct pm_qos_request *req = filp->private_data; filp->private_data = NULL; cpu_latency_qos_remove_request(req); kfree(req); return 0; } static ssize_t cpu_latency_qos_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct pm_qos_request *req = filp->private_data; unsigned long flags; s32 value; if (!req || !cpu_latency_qos_request_active(req)) return -EINVAL; spin_lock_irqsave(&pm_qos_lock, flags); value = pm_qos_get_value(&cpu_latency_constraints); spin_unlock_irqrestore(&pm_qos_lock, flags); return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); } static ssize_t cpu_latency_qos_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { s32 value; if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) return -EFAULT; } else { int ret; ret = kstrtos32_from_user(buf, count, 16, &value); if (ret) return ret; } cpu_latency_qos_update_request(filp->private_data, value); return count; } static const struct file_operations cpu_latency_qos_fops = { .write = cpu_latency_qos_write, .read = cpu_latency_qos_read, .open = cpu_latency_qos_open, .release = cpu_latency_qos_release, .llseek = noop_llseek, }; static struct miscdevice cpu_latency_qos_miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "cpu_dma_latency", .fops = &cpu_latency_qos_fops, }; static int __init cpu_latency_qos_init(void) { int ret; ret = misc_register(&cpu_latency_qos_miscdev); if (ret < 0) pr_err("%s: %s setup failed\n", __func__, cpu_latency_qos_miscdev.name); return ret; } late_initcall(cpu_latency_qos_init); #endif /* CONFIG_CPU_IDLE */ /* Definitions related to the frequency QoS below. */ static inline bool freq_qos_value_invalid(s32 value) { return value < 0 && value != PM_QOS_DEFAULT_VALUE; } /** * freq_constraints_init - Initialize frequency QoS constraints. * @qos: Frequency QoS constraints to initialize. */ void freq_constraints_init(struct freq_constraints *qos) { struct pm_qos_constraints *c; c = &qos->min_freq; plist_head_init(&c->list); c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->type = PM_QOS_MAX; c->notifiers = &qos->min_freq_notifiers; BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); c = &qos->max_freq; plist_head_init(&c->list); c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->type = PM_QOS_MIN; c->notifiers = &qos->max_freq_notifiers; BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); } /** * freq_qos_read_value - Get frequency QoS constraint for a given list. * @qos: Constraints to evaluate. * @type: QoS request type. */ s32 freq_qos_read_value(struct freq_constraints *qos, enum freq_qos_req_type type) { s32 ret; switch (type) { case FREQ_QOS_MIN: ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MIN_DEFAULT_VALUE : pm_qos_read_value(&qos->min_freq); break; case FREQ_QOS_MAX: ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MAX_DEFAULT_VALUE : pm_qos_read_value(&qos->max_freq); break; default: WARN_ON(1); ret = 0; } return ret; } /** * freq_qos_apply - Add/modify/remove frequency QoS request. * @req: Constraint request to apply. * @action: Action to perform (add/update/remove). * @value: Value to assign to the QoS request. * * This is only meant to be called from inside pm_qos, not drivers. */ int freq_qos_apply(struct freq_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret; switch(req->type) { case FREQ_QOS_MIN: ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode, action, value); break; case FREQ_QOS_MAX: ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode, action, value); break; default: ret = -EINVAL; } return ret; } /** * freq_qos_add_request - Insert new frequency QoS request into a given list. * @qos: Constraints to update. * @req: Preallocated request object. * @type: Request type. * @value: Request value. * * Insert a new entry into the @qos list of requests, recompute the effective * QoS constraint value for that list and initialize the @req object. The * caller needs to save that object for later use in updates and removal. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_add_request(struct freq_constraints *qos, struct freq_qos_request *req, enum freq_qos_req_type type, s32 value) { int ret; if (IS_ERR_OR_NULL(qos) || !req || freq_qos_value_invalid(value)) return -EINVAL; if (WARN(freq_qos_request_active(req), "%s() called for active request\n", __func__)) return -EINVAL; req->qos = qos; req->type = type; ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value); if (ret < 0) { req->qos = NULL; req->type = 0; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_add_request); /** * freq_qos_update_request - Modify existing frequency QoS request. * @req: Request to modify. * @new_value: New request value. * * Update an existing frequency QoS request along with the effective constraint * value for the list of requests it belongs to. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_update_request(struct freq_qos_request *req, s32 new_value) { if (!req || freq_qos_value_invalid(new_value)) return -EINVAL; if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; if (req->pnode.prio == new_value) return 0; return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(freq_qos_update_request); /** * freq_qos_remove_request - Remove frequency QoS request from its list. * @req: Request to remove. * * Remove the given frequency QoS request from the list of constraints it * belongs to and recompute the effective constraint value for that list. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_remove_request(struct freq_qos_request *req) { int ret; if (!req) return -EINVAL; if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); req->qos = NULL; req->type = 0; return ret; } EXPORT_SYMBOL_GPL(freq_qos_remove_request); /** * freq_qos_add_notifier - Add frequency QoS change notifier. * @qos: List of requests to add the notifier to. * @type: Request type. * @notifier: Notifier block to add. */ int freq_qos_add_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier) { int ret; if (IS_ERR_OR_NULL(qos) || !notifier) return -EINVAL; switch (type) { case FREQ_QOS_MIN: ret = blocking_notifier_chain_register(qos->min_freq.notifiers, notifier); break; case FREQ_QOS_MAX: ret = blocking_notifier_chain_register(qos->max_freq.notifiers, notifier); break; default: WARN_ON(1); ret = -EINVAL; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_add_notifier); /** * freq_qos_remove_notifier - Remove frequency QoS change notifier. * @qos: List of requests to remove the notifier from. * @type: Request type. * @notifier: Notifier block to remove. */ int freq_qos_remove_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier) { int ret; if (IS_ERR_OR_NULL(qos) || !notifier) return -EINVAL; switch (type) { case FREQ_QOS_MIN: ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers, notifier); break; case FREQ_QOS_MAX: ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers, notifier); break; default: WARN_ON(1); ret = -EINVAL; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_remove_notifier);
74 46 46 46 6 6 6 6 6 6 9 9 6 3 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/pagemap.h> #include "squashfs_fs_sb.h" #include "decompressor.h" #include "page_actor.h" /* * This file contains implementations of page_actor for decompressing into * an intermediate buffer, and for decompressing directly into the * page cache. * * Calling code should avoid sleeping between calls to squashfs_first_page() * and squashfs_finish_page(). */ /* Implementation of page_actor for decompressing into intermediate buffer */ static void *cache_first_page(struct squashfs_page_actor *actor) { actor->next_page = 1; return actor->buffer[0]; } static void *cache_next_page(struct squashfs_page_actor *actor) { if (actor->next_page == actor->pages) return NULL; return actor->buffer[actor->next_page++]; } static void cache_finish_page(struct squashfs_page_actor *actor) { /* empty */ } struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, int pages, int length) { struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); if (actor == NULL) return NULL; actor->length = length ? : pages * PAGE_SIZE; actor->buffer = buffer; actor->pages = pages; actor->next_page = 0; actor->tmp_buffer = NULL; actor->squashfs_first_page = cache_first_page; actor->squashfs_next_page = cache_next_page; actor->squashfs_finish_page = cache_finish_page; return actor; } /* Implementation of page_actor for decompressing directly into page cache. */ static void *handle_next_page(struct squashfs_page_actor *actor) { int max_pages = (actor->length + PAGE_SIZE - 1) >> PAGE_SHIFT; if (actor->returned_pages == max_pages) return NULL; if ((actor->next_page == actor->pages) || (actor->next_index != actor->page[actor->next_page]->index)) { actor->next_index++; actor->returned_pages++; actor->last_page = NULL; return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM); } actor->next_index++; actor->returned_pages++; actor->last_page = actor->page[actor->next_page]; return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]); } static void *direct_first_page(struct squashfs_page_actor *actor) { return handle_next_page(actor); } static void *direct_next_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) { kunmap_local(actor->pageaddr); actor->pageaddr = NULL; } return handle_next_page(actor); } static void direct_finish_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) kunmap_local(actor->pageaddr); } struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk, struct page **page, int pages, int length) { struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); if (actor == NULL) return NULL; if (msblk->decompressor->alloc_buffer) { actor->tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (actor->tmp_buffer == NULL) { kfree(actor); return NULL; } } else actor->tmp_buffer = NULL; actor->length = length ? : pages * PAGE_SIZE; actor->page = page; actor->pages = pages; actor->next_page = 0; actor->returned_pages = 0; actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1); actor->pageaddr = NULL; actor->last_page = NULL; actor->alloc_buffer = msblk->decompressor->alloc_buffer; actor->squashfs_first_page = direct_first_page; actor->squashfs_next_page = direct_next_page; actor->squashfs_finish_page = direct_finish_page; return actor; }
119 116 117 99 93 98 116 129 2 118 131 132 120 127 127 2 17 17 17 17 17 17 16 14 17 17 20 3 17 20 3 125 125 126 128 128 128 22 22 22 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_health.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_ag.h" static struct kmem_cache *xfs_allocbt_cur_cache; STATIC struct xfs_btree_cur * xfs_bnobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, cur->bc_ag.pag); } STATIC struct xfs_btree_cur * xfs_cntbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, cur->bc_ag.pag); } STATIC void xfs_allocbt_set_root( struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr, int inc) { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; ASSERT(ptr->s != 0); if (xfs_btree_is_bno(cur->bc_ops)) { agf->agf_bno_root = ptr->s; be32_add_cpu(&agf->agf_bno_level, inc); cur->bc_ag.pag->pagf_bno_level += inc; } else { agf->agf_cnt_root = ptr->s; be32_add_cpu(&agf->agf_cnt_level, inc); cur->bc_ag.pag->pagf_cnt_level += inc; } xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } STATIC int xfs_allocbt_alloc_block( struct xfs_btree_cur *cur, const union xfs_btree_ptr *start, union xfs_btree_ptr *new, int *stat) { int error; xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp, cur->bc_ag.agbp, &bno, 1); if (error) return error; if (bno == NULLAGBLOCK) { *stat = 0; return 0; } atomic64_inc(&cur->bc_mp->m_allocbt_blks); xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false); new->s = cpu_to_be32(bno); *stat = 1; return 0; } STATIC int xfs_allocbt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { struct xfs_buf *agbp = cur->bc_ag.agbp; xfs_agblock_t bno; int error; bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL, bno, 1); if (error) return error; atomic64_dec(&cur->bc_mp->m_allocbt_blks); xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); return 0; } /* * Update the longest extent in the AGF */ STATIC void xfs_allocbt_update_lastrec( struct xfs_btree_cur *cur, const struct xfs_btree_block *block, const union xfs_btree_rec *rec, int ptr, int reason) { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; struct xfs_perag *pag; __be32 len; int numrecs; ASSERT(!xfs_btree_is_bno(cur->bc_ops)); switch (reason) { case LASTREC_UPDATE: /* * If this is the last leaf block and it's the last record, * then update the size of the longest extent in the AG. */ if (ptr != xfs_btree_get_numrecs(block)) return; len = rec->alloc.ar_blockcount; break; case LASTREC_INSREC: if (be32_to_cpu(rec->alloc.ar_blockcount) <= be32_to_cpu(agf->agf_longest)) return; len = rec->alloc.ar_blockcount; break; case LASTREC_DELREC: numrecs = xfs_btree_get_numrecs(block); if (ptr <= numrecs) return; ASSERT(ptr == numrecs + 1); if (numrecs) { xfs_alloc_rec_t *rrp; rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); len = rrp->ar_blockcount; } else { len = 0; } break; default: ASSERT(0); return; } agf->agf_longest = len; pag = cur->bc_ag.agbp->b_pag; pag->pagf_longest = be32_to_cpu(len); xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST); } STATIC int xfs_allocbt_get_minrecs( struct xfs_btree_cur *cur, int level) { return cur->bc_mp->m_alloc_mnr[level != 0]; } STATIC int xfs_allocbt_get_maxrecs( struct xfs_btree_cur *cur, int level) { return cur->bc_mp->m_alloc_mxr[level != 0]; } STATIC void xfs_allocbt_init_key_from_rec( union xfs_btree_key *key, const union xfs_btree_rec *rec) { key->alloc.ar_startblock = rec->alloc.ar_startblock; key->alloc.ar_blockcount = rec->alloc.ar_blockcount; } STATIC void xfs_bnobt_init_high_key_from_rec( union xfs_btree_key *key, const union xfs_btree_rec *rec) { __u32 x; x = be32_to_cpu(rec->alloc.ar_startblock); x += be32_to_cpu(rec->alloc.ar_blockcount) - 1; key->alloc.ar_startblock = cpu_to_be32(x); key->alloc.ar_blockcount = 0; } STATIC void xfs_cntbt_init_high_key_from_rec( union xfs_btree_key *key, const union xfs_btree_rec *rec) { key->alloc.ar_blockcount = rec->alloc.ar_blockcount; key->alloc.ar_startblock = 0; } STATIC void xfs_allocbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) { rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); } STATIC void xfs_allocbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); if (xfs_btree_is_bno(cur->bc_ops)) ptr->s = agf->agf_bno_root; else ptr->s = agf->agf_cnt_root; } STATIC int64_t xfs_bnobt_key_diff( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; const struct xfs_alloc_rec *kp = &key->alloc; return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } STATIC int64_t xfs_cntbt_key_diff( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; const struct xfs_alloc_rec *kp = &key->alloc; int64_t diff; diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; if (diff) return diff; return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } STATIC int64_t xfs_bnobt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, const union xfs_btree_key *mask) { ASSERT(!mask || mask->alloc.ar_startblock); return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) - be32_to_cpu(k2->alloc.ar_startblock); } STATIC int64_t xfs_cntbt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, const union xfs_btree_key *mask) { int64_t diff; ASSERT(!mask || (mask->alloc.ar_blockcount && mask->alloc.ar_startblock)); diff = be32_to_cpu(k1->alloc.ar_blockcount) - be32_to_cpu(k2->alloc.ar_blockcount); if (diff) return diff; return be32_to_cpu(k1->alloc.ar_startblock) - be32_to_cpu(k2->alloc.ar_startblock); } static xfs_failaddr_t xfs_allocbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; unsigned int level; if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address; if (xfs_has_crc(mp)) { fa = xfs_btree_agblock_v5hdr_verify(bp); if (fa) return fa; } /* * The perag may not be attached during grow operations or fully * initialized from the AGF during log recovery. Therefore we can only * check against maximum tree depth from those contexts. * * Otherwise check against the per-tree limit. Peek at one of the * verifier magic values to determine the type of tree we're verifying * against. */ level = be16_to_cpu(block->bb_level); if (pag && xfs_perag_initialised_agf(pag)) { unsigned int maxlevel, repair_maxlevel = 0; /* * Online repair could be rewriting the free space btrees, so * we'll validate against the larger of either tree while this * is going on. */ if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) { maxlevel = pag->pagf_cnt_level; #ifdef CONFIG_XFS_ONLINE_REPAIR repair_maxlevel = pag->pagf_repair_cnt_level; #endif } else { maxlevel = pag->pagf_bno_level; #ifdef CONFIG_XFS_ONLINE_REPAIR repair_maxlevel = pag->pagf_repair_bno_level; #endif } if (level >= max(maxlevel, repair_maxlevel)) return __this_address; } else if (level >= mp->m_alloc_maxlevels) return __this_address; return xfs_btree_agblock_verify(bp, mp->m_alloc_mxr[level != 0]); } static void xfs_allocbt_read_verify( struct xfs_buf *bp) { xfs_failaddr_t fa; if (!xfs_btree_agblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_allocbt_verify(bp); if (fa) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); } static void xfs_allocbt_write_verify( struct xfs_buf *bp) { xfs_failaddr_t fa; fa = xfs_allocbt_verify(bp); if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_agblock_calc_crc(bp); } const struct xfs_buf_ops xfs_bnobt_buf_ops = { .name = "xfs_bnobt", .magic = { cpu_to_be32(XFS_ABTB_MAGIC), cpu_to_be32(XFS_ABTB_CRC_MAGIC) }, .verify_read = xfs_allocbt_read_verify, .verify_write = xfs_allocbt_write_verify, .verify_struct = xfs_allocbt_verify, }; const struct xfs_buf_ops xfs_cntbt_buf_ops = { .name = "xfs_cntbt", .magic = { cpu_to_be32(XFS_ABTC_MAGIC), cpu_to_be32(XFS_ABTC_CRC_MAGIC) }, .verify_read = xfs_allocbt_read_verify, .verify_write = xfs_allocbt_write_verify, .verify_struct = xfs_allocbt_verify, }; STATIC int xfs_bnobt_keys_inorder( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2) { return be32_to_cpu(k1->alloc.ar_startblock) < be32_to_cpu(k2->alloc.ar_startblock); } STATIC int xfs_bnobt_recs_inorder( struct xfs_btree_cur *cur, const union xfs_btree_rec *r1, const union xfs_btree_rec *r2) { return be32_to_cpu(r1->alloc.ar_startblock) + be32_to_cpu(r1->alloc.ar_blockcount) <= be32_to_cpu(r2->alloc.ar_startblock); } STATIC int xfs_cntbt_keys_inorder( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2) { return be32_to_cpu(k1->alloc.ar_blockcount) < be32_to_cpu(k2->alloc.ar_blockcount) || (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && be32_to_cpu(k1->alloc.ar_startblock) < be32_to_cpu(k2->alloc.ar_startblock)); } STATIC int xfs_cntbt_recs_inorder( struct xfs_btree_cur *cur, const union xfs_btree_rec *r1, const union xfs_btree_rec *r2) { return be32_to_cpu(r1->alloc.ar_blockcount) < be32_to_cpu(r2->alloc.ar_blockcount) || (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && be32_to_cpu(r1->alloc.ar_startblock) < be32_to_cpu(r2->alloc.ar_startblock)); } STATIC enum xbtree_key_contig xfs_allocbt_keys_contiguous( struct xfs_btree_cur *cur, const union xfs_btree_key *key1, const union xfs_btree_key *key2, const union xfs_btree_key *mask) { ASSERT(!mask || mask->alloc.ar_startblock); return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock), be32_to_cpu(key2->alloc.ar_startblock)); } const struct xfs_btree_ops xfs_bnobt_ops = { .name = "bno", .type = XFS_BTREE_TYPE_AG, .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), .ptr_len = XFS_BTREE_SHORT_PTR_LEN, .lru_refs = XFS_ALLOC_BTREE_REF, .statoff = XFS_STATS_CALC_INDEX(xs_abtb_2), .sick_mask = XFS_SICK_AG_BNOBT, .dup_cursor = xfs_bnobt_dup_cursor, .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec, .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_bnobt_key_diff, .buf_ops = &xfs_bnobt_buf_ops, .diff_two_keys = xfs_bnobt_diff_two_keys, .keys_inorder = xfs_bnobt_keys_inorder, .recs_inorder = xfs_bnobt_recs_inorder, .keys_contiguous = xfs_allocbt_keys_contiguous, }; const struct xfs_btree_ops xfs_cntbt_ops = { .name = "cnt", .type = XFS_BTREE_TYPE_AG, .geom_flags = XFS_BTGEO_LASTREC_UPDATE, .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), .ptr_len = XFS_BTREE_SHORT_PTR_LEN, .lru_refs = XFS_ALLOC_BTREE_REF, .statoff = XFS_STATS_CALC_INDEX(xs_abtc_2), .sick_mask = XFS_SICK_AG_CNTBT, .dup_cursor = xfs_cntbt_dup_cursor, .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec, .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_cntbt_key_diff, .buf_ops = &xfs_cntbt_buf_ops, .diff_two_keys = xfs_cntbt_diff_two_keys, .keys_inorder = xfs_cntbt_keys_inorder, .recs_inorder = xfs_cntbt_recs_inorder, .keys_contiguous = NULL, /* not needed right now */ }; /* * Allocate a new bnobt cursor. * * For staging cursors tp and agbp are NULL. */ struct xfs_btree_cur * xfs_bnobt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag) { struct xfs_btree_cur *cur; cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops, mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); cur->bc_ag.pag = xfs_perag_hold(pag); cur->bc_ag.agbp = agbp; if (agbp) { struct xfs_agf *agf = agbp->b_addr; cur->bc_nlevels = be32_to_cpu(agf->agf_bno_level); } return cur; } /* * Allocate a new cntbt cursor. * * For staging cursors tp and agbp are NULL. */ struct xfs_btree_cur * xfs_cntbt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag) { struct xfs_btree_cur *cur; cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops, mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); cur->bc_ag.pag = xfs_perag_hold(pag); cur->bc_ag.agbp = agbp; if (agbp) { struct xfs_agf *agf = agbp->b_addr; cur->bc_nlevels = be32_to_cpu(agf->agf_cnt_level); } return cur; } /* * Install a new free space btree root. Caller is responsible for invalidating * and freeing the old btree blocks. */ void xfs_allocbt_commit_staged_btree( struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp) { struct xfs_agf *agf = agbp->b_addr; struct xbtree_afakeroot *afake = cur->bc_ag.afake; ASSERT(cur->bc_flags & XFS_BTREE_STAGING); if (xfs_btree_is_bno(cur->bc_ops)) { agf->agf_bno_root = cpu_to_be32(afake->af_root); agf->agf_bno_level = cpu_to_be32(afake->af_levels); } else { agf->agf_cnt_root = cpu_to_be32(afake->af_root); agf->agf_cnt_level = cpu_to_be32(afake->af_levels); } xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); xfs_btree_commit_afakeroot(cur, tp, agbp); } /* Calculate number of records in an alloc btree block. */ static inline unsigned int xfs_allocbt_block_maxrecs( unsigned int blocklen, bool leaf) { if (leaf) return blocklen / sizeof(xfs_alloc_rec_t); return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); } /* * Calculate number of records in an alloc btree block. */ int xfs_allocbt_maxrecs( struct xfs_mount *mp, int blocklen, int leaf) { blocklen -= XFS_ALLOC_BLOCK_LEN(mp); return xfs_allocbt_block_maxrecs(blocklen, leaf); } /* Free space btrees are at their largest when every other block is free. */ #define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2) /* Compute the max possible height for free space btrees. */ unsigned int xfs_allocbt_maxlevels_ondisk(void) { unsigned int minrecs[2]; unsigned int blocklen; blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2; minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2; return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS); } /* Calculate the freespace btree size for some records. */ xfs_extlen_t xfs_allocbt_calc_size( struct xfs_mount *mp, unsigned long long len) { return xfs_btree_calc_size(mp->m_alloc_mnr, len); } int __init xfs_allocbt_init_cur_cache(void) { xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur", xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()), 0, 0, NULL); if (!xfs_allocbt_cur_cache) return -ENOMEM; return 0; } void xfs_allocbt_destroy_cur_cache(void) { kmem_cache_destroy(xfs_allocbt_cur_cache); xfs_allocbt_cur_cache = NULL; }
10 9 8 7 6 2 6 6 5 4 3 2 1 1 10 3 2 2 1 2 1 3 2 2 2 2 2 1 2 1 2 1 1 1 1 1 14 6 4 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016 Laura Garcia <nevola@gmail.com> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <linux/jhash.h> struct nft_jhash { u8 sreg; u8 dreg; u8 len; bool autogen_seed:1; u32 modulus; u32 seed; u32 offset; }; static void nft_jhash_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_jhash *priv = nft_expr_priv(expr); const void *data = &regs->data[priv->sreg]; u32 h; h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus); regs->data[priv->dreg] = h + priv->offset; } struct nft_symhash { u8 dreg; u32 modulus; u32 offset; }; static void nft_symhash_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_symhash *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; u32 h; h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); regs->data[priv->dreg] = h + priv->offset; } static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, [NFTA_HASH_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, [NFTA_HASH_TYPE] = { .type = NLA_U32 }, }; static int nft_jhash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_jhash *priv = nft_expr_priv(expr); u32 len; int err; if (!tb[NFTA_HASH_SREG] || !tb[NFTA_HASH_DREG] || !tb[NFTA_HASH_LEN] || !tb[NFTA_HASH_MODULUS]) return -EINVAL; if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len); if (err < 0) return err; if (len == 0) return -ERANGE; priv->len = len; err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len); if (err < 0) return err; priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; if (tb[NFTA_HASH_SEED]) { priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); } else { priv->autogen_seed = true; get_random_bytes(&priv->seed, sizeof(priv->seed)); } return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_symhash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_symhash *priv = nft_expr_priv(expr); if (!tb[NFTA_HASH_DREG] || !tb[NFTA_HASH_MODULUS]) return -EINVAL; if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_jhash_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_jhash *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) goto nla_put_failure; if (!priv->autogen_seed && nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) goto nla_put_failure; if (priv->offset != 0) if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static bool nft_jhash_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_jhash *priv = nft_expr_priv(expr); nft_reg_track_cancel(track, priv->dreg, sizeof(u32)); return false; } static int nft_symhash_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_symhash *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) goto nla_put_failure; if (priv->offset != 0) if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static bool nft_symhash_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { struct nft_symhash *priv = nft_expr_priv(expr); struct nft_symhash *symhash; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); return false; } symhash = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->offset != symhash->offset || priv->modulus != symhash->modulus) { nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); return false; } if (!track->regs[priv->dreg].bitwise) return true; return false; } static struct nft_expr_type nft_hash_type; static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)), .eval = nft_jhash_eval, .init = nft_jhash_init, .dump = nft_jhash_dump, .reduce = nft_jhash_reduce, }; static const struct nft_expr_ops nft_symhash_ops = { .type = &nft_hash_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), .eval = nft_symhash_eval, .init = nft_symhash_init, .dump = nft_symhash_dump, .reduce = nft_symhash_reduce, }; static const struct nft_expr_ops * nft_hash_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { u32 type; if (!tb[NFTA_HASH_TYPE]) return &nft_jhash_ops; type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE])); switch (type) { case NFT_HASH_SYM: return &nft_symhash_ops; case NFT_HASH_JENKINS: return &nft_jhash_ops; default: break; } return ERR_PTR(-EOPNOTSUPP); } static struct nft_expr_type nft_hash_type __read_mostly = { .name = "hash", .select_ops = nft_hash_select_ops, .policy = nft_hash_policy, .maxattr = NFTA_HASH_MAX, .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { return nft_register_expr(&nft_hash_type); } static void __exit nft_hash_module_exit(void) { nft_unregister_expr(&nft_hash_type); } module_init(nft_hash_module_init); module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>"); MODULE_ALIAS_NFT_EXPR("hash"); MODULE_DESCRIPTION("Netfilter nftables hash module");
10 10 8 2 8 8 8 8 7 7 7 7 3 7 3 7 4 4 7 7 3 1 3 7 8 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 // SPDX-License-Identifier: GPL-2.0-only /* * dir.c * * PURPOSE * Directory handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT * (C) 1998-2004 Ben Fennema * * HISTORY * * 10/05/98 dgb Split directory operations into its own file * Implemented directory reads via do_udf_readdir * 10/06/98 Made directory operations work! * 11/17/98 Rewrote directory to support ICBTAG_FLAG_AD_LONG * 11/25/98 blf Rewrote directory handling (readdir+lookup) to support reading * across blocks. * 12/12/98 Split out the lookup code to namei.c. bulk of directory * code now in directory.c:udf_fileident_read. */ #include "udfdecl.h" #include <linux/string.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/bio.h> #include <linux/iversion.h> #include "udf_i.h" #include "udf_sb.h" static int udf_readdir(struct file *file, struct dir_context *ctx) { struct inode *dir = file_inode(file); loff_t nf_pos, emit_pos = 0; int flen; unsigned char *fname = NULL; int ret = 0; struct super_block *sb = dir->i_sb; bool pos_valid = false; struct udf_fileident_iter iter; if (ctx->pos == 0) { if (!dir_emit_dot(file, ctx)) return 0; ctx->pos = 1; } nf_pos = (ctx->pos - 1) << 2; if (nf_pos >= dir->i_size) goto out; /* * Something changed since last readdir (either lseek was called or dir * changed)? We need to verify the position correctly points at the * beginning of some dir entry so that the directory parsing code does * not get confused. Since UDF does not have any reliable way of * identifying beginning of dir entry (names are under user control), * we need to scan the directory from the beginning. */ if (!inode_eq_iversion(dir, file->f_version)) { emit_pos = nf_pos; nf_pos = 0; } else { pos_valid = true; } fname = kmalloc(UDF_NAME_LEN, GFP_KERNEL); if (!fname) { ret = -ENOMEM; goto out; } for (ret = udf_fiiter_init(&iter, dir, nf_pos); !ret && iter.pos < dir->i_size; ret = udf_fiiter_advance(&iter)) { struct kernel_lb_addr tloc; udf_pblk_t iblock; /* Still not at offset where user asked us to read from? */ if (iter.pos < emit_pos) continue; /* Update file position only if we got past the current one */ pos_valid = true; ctx->pos = (iter.pos >> 2) + 1; if (iter.fi.fileCharacteristics & FID_FILE_CHAR_DELETED) { if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNDELETE)) continue; } if (iter.fi.fileCharacteristics & FID_FILE_CHAR_HIDDEN) { if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE)) continue; } if (iter.fi.fileCharacteristics & FID_FILE_CHAR_PARENT) { if (!dir_emit_dotdot(file, ctx)) goto out_iter; continue; } flen = udf_get_filename(sb, iter.name, iter.fi.lengthFileIdent, fname, UDF_NAME_LEN); if (flen < 0) continue; tloc = lelb_to_cpu(iter.fi.icb.extLocation); iblock = udf_get_lb_pblock(sb, &tloc, 0); if (!dir_emit(ctx, fname, flen, iblock, DT_UNKNOWN)) goto out_iter; } if (!ret) { ctx->pos = (iter.pos >> 2) + 1; pos_valid = true; } out_iter: udf_fiiter_release(&iter); out: if (pos_valid) file->f_version = inode_query_iversion(dir); kfree(fname); return ret; } /* readdir and lookup functions */ const struct file_operations udf_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = udf_readdir, .unlocked_ioctl = udf_ioctl, .fsync = generic_file_fsync, };
6 6 3 3 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/moduleparam.h> #include <linux/vmalloc.h> #include <linux/device.h> #include <linux/ndctl.h> #include <linux/slab.h> #include <linux/io.h> #include <linux/fs.h> #include <linux/mm.h> #include "nd-core.h" #include "label.h" #include "pmem.h" #include "nd.h" static DEFINE_IDA(dimm_ida); /* * Retrieve bus and dimm handle and return if this bus supports * get_config_data commands */ int nvdimm_check_config_data(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); if (!nvdimm->cmd_mask || !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) { if (test_bit(NDD_LABELING, &nvdimm->flags)) return -ENXIO; else return -ENOTTY; } return 0; } static int validate_dimm(struct nvdimm_drvdata *ndd) { int rc; if (!ndd) return -EINVAL; rc = nvdimm_check_config_data(ndd->dev); if (rc) dev_dbg(ndd->dev, "%ps: %s error: %d\n", __builtin_return_address(0), __func__, rc); return rc; } /** * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area * @ndd: dimm to initialize * * Returns: %0 if the area is already valid, -errno on error */ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) { struct nd_cmd_get_config_size *cmd = &ndd->nsarea; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); struct nvdimm_bus_descriptor *nd_desc; int rc = validate_dimm(ndd); int cmd_rc = 0; if (rc) return rc; if (cmd->config_size) return 0; /* already valid */ memset(cmd, 0, sizeof(*cmd)); nd_desc = nvdimm_bus->nd_desc; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), &cmd_rc); if (rc < 0) return rc; return cmd_rc; } int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf, size_t offset, size_t len) { struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; int rc = validate_dimm(ndd), cmd_rc = 0; struct nd_cmd_get_config_data_hdr *cmd; size_t max_cmd_size, buf_offset; if (rc) return rc; if (offset + len > ndd->nsarea.config_size) return -ENXIO; max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer); cmd = kvzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; for (buf_offset = 0; len; len -= cmd->in_length, buf_offset += cmd->in_length) { size_t cmd_size; cmd->in_offset = offset + buf_offset; cmd->in_length = min(max_cmd_size, len); cmd_size = sizeof(*cmd) + cmd->in_length; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_DATA, cmd, cmd_size, &cmd_rc); if (rc < 0) break; if (cmd_rc < 0) { rc = cmd_rc; break; } /* out_buf should be valid, copy it into our output buffer */ memcpy(buf + buf_offset, cmd->out_buf, cmd->in_length); } kvfree(cmd); return rc; } int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len) { size_t max_cmd_size, buf_offset; struct nd_cmd_set_config_hdr *cmd; int rc = validate_dimm(ndd), cmd_rc = 0; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; if (rc) return rc; if (offset + len > ndd->nsarea.config_size) return -ENXIO; max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer); cmd = kvzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); if (!cmd) return -ENOMEM; for (buf_offset = 0; len; len -= cmd->in_length, buf_offset += cmd->in_length) { size_t cmd_size; cmd->in_offset = offset + buf_offset; cmd->in_length = min(max_cmd_size, len); memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length); /* status is output in the last 4-bytes of the command buffer */ cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32); rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, &cmd_rc); if (rc < 0) break; if (cmd_rc < 0) { rc = cmd_rc; break; } } kvfree(cmd); return rc; } void nvdimm_set_labeling(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); set_bit(NDD_LABELING, &nvdimm->flags); } void nvdimm_set_locked(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); set_bit(NDD_LOCKED, &nvdimm->flags); } void nvdimm_clear_locked(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); clear_bit(NDD_LOCKED, &nvdimm->flags); } static void nvdimm_release(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); ida_free(&dimm_ida, nvdimm->id); kfree(nvdimm); } struct nvdimm *to_nvdimm(struct device *dev) { struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev); WARN_ON(!is_nvdimm(dev)); return nvdimm; } EXPORT_SYMBOL_GPL(to_nvdimm); struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) { struct nvdimm *nvdimm = nd_mapping->nvdimm; WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev)); return dev_get_drvdata(&nvdimm->dev); } EXPORT_SYMBOL(to_ndd); void nvdimm_drvdata_release(struct kref *kref) { struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref); struct device *dev = ndd->dev; struct resource *res, *_r; dev_dbg(dev, "trace\n"); nvdimm_bus_lock(dev); for_each_dpa_resource_safe(ndd, res, _r) nvdimm_free_dpa(ndd, res); nvdimm_bus_unlock(dev); kvfree(ndd->data); kfree(ndd); put_device(dev); } void get_ndd(struct nvdimm_drvdata *ndd) { kref_get(&ndd->kref); } void put_ndd(struct nvdimm_drvdata *ndd) { if (ndd) kref_put(&ndd->kref, nvdimm_drvdata_release); } const char *nvdimm_name(struct nvdimm *nvdimm) { return dev_name(&nvdimm->dev); } EXPORT_SYMBOL_GPL(nvdimm_name); struct kobject *nvdimm_kobj(struct nvdimm *nvdimm) { return &nvdimm->dev.kobj; } EXPORT_SYMBOL_GPL(nvdimm_kobj); unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm) { return nvdimm->cmd_mask; } EXPORT_SYMBOL_GPL(nvdimm_cmd_mask); void *nvdimm_provider_data(struct nvdimm *nvdimm) { if (nvdimm) return nvdimm->provider_data; return NULL; } EXPORT_SYMBOL_GPL(nvdimm_provider_data); static ssize_t commands_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); int cmd, len = 0; if (!nvdimm->cmd_mask) return sprintf(buf, "\n"); for_each_set_bit(cmd, &nvdimm->cmd_mask, BITS_PER_LONG) len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); len += sprintf(buf + len, "\n"); return len; } static DEVICE_ATTR_RO(commands); static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); return sprintf(buf, "%s%s\n", test_bit(NDD_LABELING, &nvdimm->flags) ? "label " : "", test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : ""); } static DEVICE_ATTR_RO(flags); static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); /* * The state may be in the process of changing, userspace should * quiesce probing if it wants a static answer */ nvdimm_bus_lock(dev); nvdimm_bus_unlock(dev); return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy) ? "active" : "idle"); } static DEVICE_ATTR_RO(state); static ssize_t __available_slots_show(struct nvdimm_drvdata *ndd, char *buf) { struct device *dev; ssize_t rc; u32 nfree; if (!ndd) return -ENXIO; dev = ndd->dev; nvdimm_bus_lock(dev); nfree = nd_label_nfree(ndd); if (nfree - 1 > nfree) { dev_WARN_ONCE(dev, 1, "we ate our last label?\n"); nfree = 0; } else nfree--; rc = sprintf(buf, "%d\n", nfree); nvdimm_bus_unlock(dev); return rc; } static ssize_t available_slots_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t rc; device_lock(dev); rc = __available_slots_show(dev_get_drvdata(dev), buf); device_unlock(dev); return rc; } static DEVICE_ATTR_RO(available_slots); static ssize_t security_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); /* * For the test version we need to poll the "hardware" in order * to get the updated status for unlock testing. */ if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags)) return sprintf(buf, "overwrite\n"); if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) return sprintf(buf, "disabled\n"); if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) return sprintf(buf, "unlocked\n"); if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags)) return sprintf(buf, "locked\n"); return -ENOTTY; } static ssize_t frozen_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); return sprintf(buf, "%d\n", test_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags)); } static DEVICE_ATTR_RO(frozen); static ssize_t security_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { ssize_t rc; /* * Require all userspace triggered security management to be * done while probing is idle and the DIMM is not in active use * in any region. */ device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = nvdimm_security_store(dev, buf, len); nvdimm_bus_unlock(dev); device_unlock(dev); return rc; } static DEVICE_ATTR_RW(security); static struct attribute *nvdimm_attributes[] = { &dev_attr_state.attr, &dev_attr_flags.attr, &dev_attr_commands.attr, &dev_attr_available_slots.attr, &dev_attr_security.attr, &dev_attr_frozen.attr, NULL, }; static umode_t nvdimm_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, typeof(*dev), kobj); struct nvdimm *nvdimm = to_nvdimm(dev); if (a != &dev_attr_security.attr && a != &dev_attr_frozen.attr) return a->mode; if (!nvdimm->sec.flags) return 0; if (a == &dev_attr_security.attr) { /* Are there any state mutation ops (make writable)? */ if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable || nvdimm->sec.ops->change_key || nvdimm->sec.ops->erase || nvdimm->sec.ops->overwrite) return a->mode; return 0444; } if (nvdimm->sec.ops->freeze) return a->mode; return 0; } static const struct attribute_group nvdimm_attribute_group = { .attrs = nvdimm_attributes, .is_visible = nvdimm_visible, }; static ssize_t result_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); enum nvdimm_fwa_result result; if (!nvdimm->fw_ops) return -EOPNOTSUPP; nvdimm_bus_lock(dev); result = nvdimm->fw_ops->activate_result(nvdimm); nvdimm_bus_unlock(dev); switch (result) { case NVDIMM_FWA_RESULT_NONE: return sprintf(buf, "none\n"); case NVDIMM_FWA_RESULT_SUCCESS: return sprintf(buf, "success\n"); case NVDIMM_FWA_RESULT_FAIL: return sprintf(buf, "fail\n"); case NVDIMM_FWA_RESULT_NOTSTAGED: return sprintf(buf, "not_staged\n"); case NVDIMM_FWA_RESULT_NEEDRESET: return sprintf(buf, "need_reset\n"); default: return -ENXIO; } } static DEVICE_ATTR_ADMIN_RO(result); static ssize_t activate_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); enum nvdimm_fwa_state state; if (!nvdimm->fw_ops) return -EOPNOTSUPP; nvdimm_bus_lock(dev); state = nvdimm->fw_ops->activate_state(nvdimm); nvdimm_bus_unlock(dev); switch (state) { case NVDIMM_FWA_IDLE: return sprintf(buf, "idle\n"); case NVDIMM_FWA_BUSY: return sprintf(buf, "busy\n"); case NVDIMM_FWA_ARMED: return sprintf(buf, "armed\n"); default: return -ENXIO; } } static ssize_t activate_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct nvdimm *nvdimm = to_nvdimm(dev); enum nvdimm_fwa_trigger arg; int rc; if (!nvdimm->fw_ops) return -EOPNOTSUPP; if (sysfs_streq(buf, "arm")) arg = NVDIMM_FWA_ARM; else if (sysfs_streq(buf, "disarm")) arg = NVDIMM_FWA_DISARM; else return -EINVAL; nvdimm_bus_lock(dev); rc = nvdimm->fw_ops->arm(nvdimm, arg); nvdimm_bus_unlock(dev); if (rc < 0) return rc; return len; } static DEVICE_ATTR_ADMIN_RW(activate); static struct attribute *nvdimm_firmware_attributes[] = { &dev_attr_activate.attr, &dev_attr_result.attr, NULL, }; static umode_t nvdimm_firmware_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, typeof(*dev), kobj); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; struct nvdimm *nvdimm = to_nvdimm(dev); enum nvdimm_fwa_capability cap; if (!nd_desc->fw_ops) return 0; if (!nvdimm->fw_ops) return 0; nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); nvdimm_bus_unlock(dev); if (cap < NVDIMM_FWA_CAP_QUIESCE) return 0; return a->mode; } static const struct attribute_group nvdimm_firmware_attribute_group = { .name = "firmware", .attrs = nvdimm_firmware_attributes, .is_visible = nvdimm_firmware_visible, }; static const struct attribute_group *nvdimm_attribute_groups[] = { &nd_device_attribute_group, &nvdimm_attribute_group, &nvdimm_firmware_attribute_group, NULL, }; static const struct device_type nvdimm_device_type = { .name = "nvdimm", .release = nvdimm_release, .groups = nvdimm_attribute_groups, }; bool is_nvdimm(const struct device *dev) { return dev->type == &nvdimm_device_type; } static struct lock_class_key nvdimm_key; struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, struct resource *flush_wpq, const char *dimm_id, const struct nvdimm_security_ops *sec_ops, const struct nvdimm_fw_ops *fw_ops) { struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); struct device *dev; if (!nvdimm) return NULL; nvdimm->id = ida_alloc(&dimm_ida, GFP_KERNEL); if (nvdimm->id < 0) { kfree(nvdimm); return NULL; } nvdimm->dimm_id = dimm_id; nvdimm->provider_data = provider_data; nvdimm->flags = flags; nvdimm->cmd_mask = cmd_mask; nvdimm->num_flush = num_flush; nvdimm->flush_wpq = flush_wpq; atomic_set(&nvdimm->busy, 0); dev = &nvdimm->dev; dev_set_name(dev, "nmem%d", nvdimm->id); dev->parent = &nvdimm_bus->dev; dev->type = &nvdimm_device_type; dev->devt = MKDEV(nvdimm_major, nvdimm->id); dev->groups = groups; nvdimm->sec.ops = sec_ops; nvdimm->fw_ops = fw_ops; nvdimm->sec.overwrite_tmo = 0; INIT_DELAYED_WORK(&nvdimm->dwork, nvdimm_security_overwrite_query); /* * Security state must be initialized before device_add() for * attribute visibility. */ /* get security state and extended (master) state */ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); device_initialize(dev); lockdep_set_class(&dev->mutex, &nvdimm_key); if (test_bit(NDD_REGISTER_SYNC, &flags)) nd_device_register_sync(dev); else nd_device_register(dev); return nvdimm; } EXPORT_SYMBOL_GPL(__nvdimm_create); void nvdimm_delete(struct nvdimm *nvdimm) { struct device *dev = &nvdimm->dev; bool dev_put = false; /* We are shutting down. Make state frozen artificially. */ nvdimm_bus_lock(dev); set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags); if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags)) dev_put = true; nvdimm_bus_unlock(dev); cancel_delayed_work_sync(&nvdimm->dwork); if (dev_put) put_device(dev); nd_device_unregister(dev, ND_SYNC); } EXPORT_SYMBOL_GPL(nvdimm_delete); static void shutdown_security_notify(void *data) { struct nvdimm *nvdimm = data; sysfs_put(nvdimm->sec.overwrite_state); } int nvdimm_security_setup_events(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); if (!nvdimm->sec.flags || !nvdimm->sec.ops || !nvdimm->sec.ops->overwrite) return 0; nvdimm->sec.overwrite_state = sysfs_get_dirent(dev->kobj.sd, "security"); if (!nvdimm->sec.overwrite_state) return -ENOMEM; return devm_add_action_or_reset(dev, shutdown_security_notify, nvdimm); } EXPORT_SYMBOL_GPL(nvdimm_security_setup_events); int nvdimm_in_overwrite(struct nvdimm *nvdimm) { return test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags); } EXPORT_SYMBOL_GPL(nvdimm_in_overwrite); int nvdimm_security_freeze(struct nvdimm *nvdimm) { int rc; WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev)); if (!nvdimm->sec.ops || !nvdimm->sec.ops->freeze) return -EOPNOTSUPP; if (!nvdimm->sec.flags) return -EIO; if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { dev_warn(&nvdimm->dev, "Overwrite operation in progress.\n"); return -EBUSY; } rc = nvdimm->sec.ops->freeze(nvdimm); nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return rc; } static unsigned long dpa_align(struct nd_region *nd_region) { struct device *dev = &nd_region->dev; if (dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "bus lock required for capacity provision\n")) return 0; if (dev_WARN_ONCE(dev, !nd_region->ndr_mappings || nd_region->align % nd_region->ndr_mappings, "invalid region align %#lx mappings: %d\n", nd_region->align, nd_region->ndr_mappings)) return 0; return nd_region->align / nd_region->ndr_mappings; } /** * nd_pmem_max_contiguous_dpa - For the given dimm+region, return the max * contiguous unallocated dpa range. * @nd_region: constrain available space check to this reference region * @nd_mapping: container of dpa-resource-root + labels * * Returns: %0 if there is an alignment error, otherwise the max * unallocated dpa range */ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region, struct nd_mapping *nd_mapping) { struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nvdimm_bus *nvdimm_bus; resource_size_t max = 0; struct resource *res; unsigned long align; /* if a dimm is disabled the available capacity is zero */ if (!ndd) return 0; align = dpa_align(nd_region); if (!align) return 0; nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); if (__reserve_free_pmem(&nd_region->dev, nd_mapping->nvdimm)) return 0; for_each_dpa_resource(ndd, res) { resource_size_t start, end; if (strcmp(res->name, "pmem-reserve") != 0) continue; /* trim free space relative to current alignment setting */ start = ALIGN(res->start, align); end = ALIGN_DOWN(res->end + 1, align) - 1; if (end < start) continue; if (end - start + 1 > max) max = end - start + 1; } release_free_pmem(nvdimm_bus, nd_mapping); return max; } /** * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa * @nd_mapping: container of dpa-resource-root + labels * @nd_region: constrain available space check to this reference region * * Validate that a PMEM label, if present, aligns with the start of an * interleave set. * * Returns: %0 if there is an alignment error, otherwise the unallocated dpa */ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, struct nd_mapping *nd_mapping) { struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); resource_size_t map_start, map_end, busy = 0; struct resource *res; unsigned long align; if (!ndd) return 0; align = dpa_align(nd_region); if (!align) return 0; map_start = nd_mapping->start; map_end = map_start + nd_mapping->size - 1; for_each_dpa_resource(ndd, res) { resource_size_t start, end; start = ALIGN_DOWN(res->start, align); end = ALIGN(res->end + 1, align) - 1; if (start >= map_start && start < map_end) { if (end > map_end) { nd_dbg_dpa(nd_region, ndd, res, "misaligned to iset\n"); return 0; } busy += end - start + 1; } else if (end >= map_start && end <= map_end) { busy += end - start + 1; } else if (map_start > start && map_start < end) { /* total eclipse of the mapping */ busy += nd_mapping->size; } } if (busy < nd_mapping->size) return ALIGN_DOWN(nd_mapping->size - busy, align); return 0; } void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res) { WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); kfree(res->name); __release_region(&ndd->dpa, res->start, resource_size(res)); } struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id, resource_size_t start, resource_size_t n) { char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL); struct resource *res; if (!name) return NULL; WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); res = __request_region(&ndd->dpa, start, n, name, 0); if (!res) kfree(name); return res; } /** * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id * @ndd: container of dpa-resource-root + labels * @label_id: dpa resource name of the form pmem-<human readable uuid> * * Returns: sum of the dpa allocated to the label_id */ resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id) { resource_size_t allocated = 0; struct resource *res; for_each_dpa_resource(ndd, res) if (strcmp(res->name, label_id->id) == 0) allocated += resource_size(res); return allocated; } static int count_dimms(struct device *dev, void *c) { int *count = c; if (is_nvdimm(dev)) (*count)++; return 0; } int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) { int count = 0; /* Flush any possible dimm registration failures */ nd_synchronize(); device_for_each_child(&nvdimm_bus->dev, &count, count_dimms); dev_dbg(&nvdimm_bus->dev, "count: %d\n", count); if (count != dimm_count) return -ENXIO; return 0; } EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); void __exit nvdimm_devs_exit(void) { ida_destroy(&dimm_ida); }
1 3 3 3 2 3 3 2 3 2 2 2 2 2 3 1 2 2 1 3 3 2 1 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 // SPDX-License-Identifier: GPL-2.0-or-later /* * vimc-capture.c Virtual Media Controller Driver * * Copyright (C) 2015-2017 Helen Koike <helen.fornazier@gmail.com> */ #include <media/v4l2-ioctl.h> #include <media/videobuf2-core.h> #include <media/videobuf2-dma-contig.h> #include <media/videobuf2-vmalloc.h> #include "vimc-common.h" #include "vimc-streamer.h" struct vimc_capture_device { struct vimc_ent_device ved; struct video_device vdev; struct v4l2_pix_format format; struct vb2_queue queue; struct list_head buf_list; /* * NOTE: in a real driver, a spin lock must be used to access the * queue because the frames are generated from a hardware interruption * and the isr is not allowed to sleep. * Even if it is not necessary a spinlock in the vimc driver, we * use it here as a code reference */ spinlock_t qlock; struct mutex lock; u32 sequence; struct vimc_stream stream; struct media_pad pad; }; static const struct v4l2_pix_format fmt_default = { .width = 640, .height = 480, .pixelformat = V4L2_PIX_FMT_RGB24, .field = V4L2_FIELD_NONE, .colorspace = V4L2_COLORSPACE_SRGB, }; struct vimc_capture_buffer { /* * struct vb2_v4l2_buffer must be the first element * the videobuf2 framework will allocate this struct based on * buf_struct_size and use the first sizeof(struct vb2_buffer) bytes of * memory as a vb2_buffer */ struct vb2_v4l2_buffer vb2; struct list_head list; }; static int vimc_capture_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { strscpy(cap->driver, VIMC_PDEV_NAME, sizeof(cap->driver)); strscpy(cap->card, KBUILD_MODNAME, sizeof(cap->card)); snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", VIMC_PDEV_NAME); return 0; } static void vimc_capture_get_format(struct vimc_ent_device *ved, struct v4l2_pix_format *fmt) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); *fmt = vcapture->format; } static int vimc_capture_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); f->fmt.pix = vcapture->format; return 0; } static int vimc_capture_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format *format = &f->fmt.pix; const struct vimc_pix_map *vpix; format->width = clamp_t(u32, format->width, VIMC_FRAME_MIN_WIDTH, VIMC_FRAME_MAX_WIDTH) & ~1; format->height = clamp_t(u32, format->height, VIMC_FRAME_MIN_HEIGHT, VIMC_FRAME_MAX_HEIGHT) & ~1; /* Don't accept a pixelformat that is not on the table */ vpix = vimc_pix_map_by_pixelformat(format->pixelformat); if (!vpix) { format->pixelformat = fmt_default.pixelformat; vpix = vimc_pix_map_by_pixelformat(format->pixelformat); } /* TODO: Add support for custom bytesperline values */ format->bytesperline = format->width * vpix->bpp; format->sizeimage = format->bytesperline * format->height; if (format->field == V4L2_FIELD_ANY) format->field = fmt_default.field; vimc_colorimetry_clamp(format); if (format->colorspace == V4L2_COLORSPACE_DEFAULT) format->colorspace = fmt_default.colorspace; return 0; } static int vimc_capture_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); int ret; /* Do not change the format while stream is on */ if (vb2_is_busy(&vcapture->queue)) return -EBUSY; ret = vimc_capture_try_fmt_vid_cap(file, priv, f); if (ret) return ret; dev_dbg(vcapture->ved.dev, "%s: format update: " "old:%dx%d (0x%x, %d, %d, %d, %d) " "new:%dx%d (0x%x, %d, %d, %d, %d)\n", vcapture->vdev.name, /* old */ vcapture->format.width, vcapture->format.height, vcapture->format.pixelformat, vcapture->format.colorspace, vcapture->format.quantization, vcapture->format.xfer_func, vcapture->format.ycbcr_enc, /* new */ f->fmt.pix.width, f->fmt.pix.height, f->fmt.pix.pixelformat, f->fmt.pix.colorspace, f->fmt.pix.quantization, f->fmt.pix.xfer_func, f->fmt.pix.ycbcr_enc); vcapture->format = f->fmt.pix; return 0; } static int vimc_capture_enum_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fmtdesc *f) { const struct vimc_pix_map *vpix; if (f->mbus_code) { if (f->index > 0) return -EINVAL; vpix = vimc_pix_map_by_code(f->mbus_code); } else { vpix = vimc_pix_map_by_index(f->index); } if (!vpix) return -EINVAL; f->pixelformat = vpix->pixelformat; return 0; } static int vimc_capture_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { const struct vimc_pix_map *vpix; if (fsize->index) return -EINVAL; /* Only accept code in the pix map table */ vpix = vimc_pix_map_by_code(fsize->pixel_format); if (!vpix) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; fsize->stepwise.min_width = VIMC_FRAME_MIN_WIDTH; fsize->stepwise.max_width = VIMC_FRAME_MAX_WIDTH; fsize->stepwise.min_height = VIMC_FRAME_MIN_HEIGHT; fsize->stepwise.max_height = VIMC_FRAME_MAX_HEIGHT; fsize->stepwise.step_width = 1; fsize->stepwise.step_height = 1; return 0; } static const struct v4l2_file_operations vimc_capture_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .read = vb2_fop_read, .poll = vb2_fop_poll, .unlocked_ioctl = video_ioctl2, .mmap = vb2_fop_mmap, }; static const struct v4l2_ioctl_ops vimc_capture_ioctl_ops = { .vidioc_querycap = vimc_capture_querycap, .vidioc_g_fmt_vid_cap = vimc_capture_g_fmt_vid_cap, .vidioc_s_fmt_vid_cap = vimc_capture_s_fmt_vid_cap, .vidioc_try_fmt_vid_cap = vimc_capture_try_fmt_vid_cap, .vidioc_enum_fmt_vid_cap = vimc_capture_enum_fmt_vid_cap, .vidioc_enum_framesizes = vimc_capture_enum_framesizes, .vidioc_reqbufs = vb2_ioctl_reqbufs, .vidioc_create_bufs = vb2_ioctl_create_bufs, .vidioc_prepare_buf = vb2_ioctl_prepare_buf, .vidioc_querybuf = vb2_ioctl_querybuf, .vidioc_qbuf = vb2_ioctl_qbuf, .vidioc_dqbuf = vb2_ioctl_dqbuf, .vidioc_expbuf = vb2_ioctl_expbuf, .vidioc_streamon = vb2_ioctl_streamon, .vidioc_streamoff = vb2_ioctl_streamoff, }; static void vimc_capture_return_all_buffers(struct vimc_capture_device *vcapture, enum vb2_buffer_state state) { struct vimc_capture_buffer *vbuf, *node; spin_lock(&vcapture->qlock); list_for_each_entry_safe(vbuf, node, &vcapture->buf_list, list) { list_del(&vbuf->list); vb2_buffer_done(&vbuf->vb2.vb2_buf, state); } spin_unlock(&vcapture->qlock); } static int vimc_capture_start_streaming(struct vb2_queue *vq, unsigned int count) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); int ret; vcapture->sequence = 0; /* Start the media pipeline */ ret = video_device_pipeline_start(&vcapture->vdev, &vcapture->stream.pipe); if (ret) { vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } ret = vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 1); if (ret) { video_device_pipeline_stop(&vcapture->vdev); vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } return 0; } /* * Stop the stream engine. Any remaining buffers in the stream queue are * dequeued and passed on to the vb2 framework marked as STATE_ERROR. */ static void vimc_capture_stop_streaming(struct vb2_queue *vq) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 0); /* Stop the media pipeline */ video_device_pipeline_stop(&vcapture->vdev); /* Release all active buffers */ vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_ERROR); } static void vimc_capture_buf_queue(struct vb2_buffer *vb2_buf) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb2_buf->vb2_queue); struct vimc_capture_buffer *buf = container_of(vb2_buf, struct vimc_capture_buffer, vb2.vb2_buf); spin_lock(&vcapture->qlock); list_add_tail(&buf->list, &vcapture->buf_list); spin_unlock(&vcapture->qlock); } static int vimc_capture_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); if (*nplanes) return sizes[0] < vcapture->format.sizeimage ? -EINVAL : 0; /* We don't support multiplanes for now */ *nplanes = 1; sizes[0] = vcapture->format.sizeimage; return 0; } static int vimc_capture_buffer_prepare(struct vb2_buffer *vb) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb->vb2_queue); unsigned long size = vcapture->format.sizeimage; if (vb2_plane_size(vb, 0) < size) { dev_err(vcapture->ved.dev, "%s: buffer too small (%lu < %lu)\n", vcapture->vdev.name, vb2_plane_size(vb, 0), size); return -EINVAL; } return 0; } static const struct vb2_ops vimc_capture_qops = { .start_streaming = vimc_capture_start_streaming, .stop_streaming = vimc_capture_stop_streaming, .buf_queue = vimc_capture_buf_queue, .queue_setup = vimc_capture_queue_setup, .buf_prepare = vimc_capture_buffer_prepare, /* * Since q->lock is set we can use the standard * vb2_ops_wait_prepare/finish helper functions. */ .wait_prepare = vb2_ops_wait_prepare, .wait_finish = vb2_ops_wait_finish, }; static const struct media_entity_operations vimc_capture_mops = { .link_validate = vimc_vdev_link_validate, }; static void vimc_capture_release(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); media_entity_cleanup(vcapture->ved.ent); kfree(vcapture); } static void vimc_capture_unregister(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); vb2_video_unregister_device(&vcapture->vdev); } static void *vimc_capture_process_frame(struct vimc_ent_device *ved, const void *frame) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); struct vimc_capture_buffer *vimc_buf; void *vbuf; spin_lock(&vcapture->qlock); /* Get the first entry of the list */ vimc_buf = list_first_entry_or_null(&vcapture->buf_list, typeof(*vimc_buf), list); if (!vimc_buf) { spin_unlock(&vcapture->qlock); return ERR_PTR(-EAGAIN); } /* Remove this entry from the list */ list_del(&vimc_buf->list); spin_unlock(&vcapture->qlock); /* Fill the buffer */ vimc_buf->vb2.vb2_buf.timestamp = ktime_get_ns(); vimc_buf->vb2.sequence = vcapture->sequence++; vimc_buf->vb2.field = vcapture->format.field; vbuf = vb2_plane_vaddr(&vimc_buf->vb2.vb2_buf, 0); memcpy(vbuf, frame, vcapture->format.sizeimage); /* Set it as ready */ vb2_set_plane_payload(&vimc_buf->vb2.vb2_buf, 0, vcapture->format.sizeimage); vb2_buffer_done(&vimc_buf->vb2.vb2_buf, VB2_BUF_STATE_DONE); return NULL; } static struct vimc_ent_device *vimc_capture_add(struct vimc_device *vimc, const char *vcfg_name) { struct v4l2_device *v4l2_dev = &vimc->v4l2_dev; const struct vimc_pix_map *vpix; struct vimc_capture_device *vcapture; struct video_device *vdev; struct vb2_queue *q; int ret; /* Allocate the vimc_capture_device struct */ vcapture = kzalloc(sizeof(*vcapture), GFP_KERNEL); if (!vcapture) return ERR_PTR(-ENOMEM); /* Initialize the media entity */ vcapture->vdev.entity.name = vcfg_name; vcapture->vdev.entity.function = MEDIA_ENT_F_IO_V4L; vcapture->pad.flags = MEDIA_PAD_FL_SINK; ret = media_entity_pads_init(&vcapture->vdev.entity, 1, &vcapture->pad); if (ret) goto err_free_vcapture; /* Initialize the lock */ mutex_init(&vcapture->lock); /* Initialize the vb2 queue */ q = &vcapture->queue; q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; q->io_modes = VB2_MMAP | VB2_DMABUF; if (vimc_allocator == VIMC_ALLOCATOR_VMALLOC) q->io_modes |= VB2_USERPTR; q->drv_priv = vcapture; q->buf_struct_size = sizeof(struct vimc_capture_buffer); q->ops = &vimc_capture_qops; q->mem_ops = vimc_allocator == VIMC_ALLOCATOR_DMA_CONTIG ? &vb2_dma_contig_memops : &vb2_vmalloc_memops; q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->min_queued_buffers = 2; q->lock = &vcapture->lock; q->dev = v4l2_dev->dev; ret = vb2_queue_init(q); if (ret) { dev_err(vimc->mdev.dev, "%s: vb2 queue init failed (err=%d)\n", vcfg_name, ret); goto err_clean_m_ent; } /* Initialize buffer list and its lock */ INIT_LIST_HEAD(&vcapture->buf_list); spin_lock_init(&vcapture->qlock); /* Set default frame format */ vcapture->format = fmt_default; vpix = vimc_pix_map_by_pixelformat(vcapture->format.pixelformat); vcapture->format.bytesperline = vcapture->format.width * vpix->bpp; vcapture->format.sizeimage = vcapture->format.bytesperline * vcapture->format.height; /* Fill the vimc_ent_device struct */ vcapture->ved.ent = &vcapture->vdev.entity; vcapture->ved.process_frame = vimc_capture_process_frame; vcapture->ved.vdev_get_format = vimc_capture_get_format; vcapture->ved.dev = vimc->mdev.dev; /* Initialize the video_device struct */ vdev = &vcapture->vdev; vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING | V4L2_CAP_IO_MC; vdev->entity.ops = &vimc_capture_mops; vdev->release = video_device_release_empty; vdev->fops = &vimc_capture_fops; vdev->ioctl_ops = &vimc_capture_ioctl_ops; vdev->lock = &vcapture->lock; vdev->queue = q; vdev->v4l2_dev = v4l2_dev; vdev->vfl_dir = VFL_DIR_RX; strscpy(vdev->name, vcfg_name, sizeof(vdev->name)); video_set_drvdata(vdev, &vcapture->ved); /* Register the video_device with the v4l2 and the media framework */ ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (ret) { dev_err(vimc->mdev.dev, "%s: video register failed (err=%d)\n", vcapture->vdev.name, ret); goto err_clean_m_ent; } return &vcapture->ved; err_clean_m_ent: media_entity_cleanup(&vcapture->vdev.entity); err_free_vcapture: kfree(vcapture); return ERR_PTR(ret); } struct vimc_ent_type vimc_capture_type = { .add = vimc_capture_add, .unregister = vimc_capture_unregister, .release = vimc_capture_release };
66 66 61 61 82 80 1 2 2 2 7 4 7 6 1 2 2 6 2 4 6 6 4 4 4 7 6 4 4 7 2 5 2 7 61 8 6 7 7 13 1 1 13 103 4 5 5 5 5 5 4 5 4 5 2 2 5 5 5 5 5 119 97 91 33 79 92 91 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 // SPDX-License-Identifier: GPL-2.0-only /* * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 * Home page: * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC * This is from the implementation of CUBIC TCP in * Sangtae Ha, Injong Rhee and Lisong Xu, * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" * in ACM SIGOPS Operating System Review, July 2008. * Available from: * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf * * CUBIC integrates a new slow start algorithm, called HyStart. * The details of HyStart are presented in * Sangtae Ha and Injong Rhee, * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. * Available from: * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf * * All testing results are available from: * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing * * Unless CUBIC is enabled and congestion window is large * this behaves the same as the original Reno. */ #include <linux/mm.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/module.h> #include <linux/math64.h> #include <net/tcp.h> #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation * max_cwnd = snd_cwnd * beta */ #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ /* Two methods of hybrid slow start */ #define HYSTART_ACK_TRAIN 0x1 #define HYSTART_DELAY 0x2 /* Number of delay samples for detecting the increase of delay */ #define HYSTART_MIN_SAMPLES 8 #define HYSTART_DELAY_MIN (4000U) /* 4 ms */ #define HYSTART_DELAY_MAX (16000U) /* 16 ms */ #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) static int fast_convergence __read_mostly = 1; static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ static int initial_ssthresh __read_mostly; static int bic_scale __read_mostly = 41; static int tcp_friendliness __read_mostly = 1; static int hystart __read_mostly = 1; static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; static int hystart_low_window __read_mostly = 16; static int hystart_ack_delta_us __read_mostly = 2000; static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; static u64 cube_factor __read_mostly; /* Note parameters that are used for precomputing scale factors are read-only */ module_param(fast_convergence, int, 0644); MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); module_param(beta, int, 0644); MODULE_PARM_DESC(beta, "beta for multiplicative increase"); module_param(initial_ssthresh, int, 0644); MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); module_param(bic_scale, int, 0444); MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); module_param(tcp_friendliness, int, 0644); MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); module_param(hystart, int, 0644); MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); module_param(hystart_detect, int, 0644); MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms" " 1: packet-train 2: delay 3: both packet-train and delay"); module_param(hystart_low_window, int, 0644); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); module_param(hystart_ack_delta_us, int, 0644); MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)"); /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ u32 bic_K; /* time to origin point from the beginning of the current epoch */ u32 delay_min; /* min delay (usec) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ u16 unused; u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ u32 end_seq; /* end_seq of the round */ u32 last_ack; /* last time when the ACK spacing is close */ u32 curr_rtt; /* the minimum rtt of current round */ }; static inline void bictcp_reset(struct bictcp *ca) { memset(ca, 0, offsetof(struct bictcp, unused)); ca->found = 0; } static inline u32 bictcp_clock_us(const struct sock *sk) { return tcp_sk(sk)->tcp_mstamp; } static inline void bictcp_hystart_reset(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); ca->round_start = ca->last_ack = bictcp_clock_us(sk); ca->end_seq = tp->snd_nxt; ca->curr_rtt = ~0U; ca->sample_cnt = 0; } __bpf_kfunc static void cubictcp_init(struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); if (hystart) bictcp_hystart_reset(sk); if (!hystart && initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } __bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); u32 now = tcp_jiffies32; s32 delta; delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while. * Shift epoch_start to keep cwnd growth to cubic curve. */ if (ca->epoch_start && delta > 0) { ca->epoch_start += delta; if (after(ca->epoch_start, now)) ca->epoch_start = now; } return; } } /* calculate the cubic root of x using a table lookup followed by one * Newton-Raphson iteration. * Avg err ~= 0.195% */ static u32 cubic_root(u64 a) { u32 x, b, shift; /* * cbrt(x) MSB values for x MSB values in [0..63]. * Precomputed then refined by hand - Willy Tarreau * * For x in [0..63], * v = cbrt(x << 18) - 1 * cbrt(x) = (v[x] + 10) >> 6 */ static const u8 v[] = { /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, }; b = fls64(a); if (b < 7) { /* a in [0..63] */ return ((u32)v[(u32)a] + 35) >> 6; } b = ((b * 84) >> 8) - 1; shift = (a >> (b * 3)); x = ((u32)(((u32)v[shift] + 10) << b)) >> 6; /* * Newton-Raphson iteration * 2 * x = ( 2 * x + a / x ) / 3 * k+1 k k */ x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1))); x = ((x * 341) >> 10); return x; } /* * Compute congestion window to use. */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) { u32 delta, bic_target, max_cnt; u64 offs, t; ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; /* The CUBIC function can update ca->cnt at most once per jiffy. * On all cwnd reduction events, ca->epoch_start is set to 0, * which will force a recalculation of ca->cnt. */ if (ca->epoch_start && tcp_jiffies32 == ca->last_time) goto tcp_friendliness; ca->last_cwnd = cwnd; ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) { ca->epoch_start = tcp_jiffies32; /* record beginning */ ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ if (ca->last_max_cwnd <= cwnd) { ca->bic_K = 0; ca->bic_origin_point = cwnd; } else { /* Compute new K based on * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) */ ca->bic_K = cubic_root(cube_factor * (ca->last_max_cwnd - cwnd)); ca->bic_origin_point = ca->last_max_cwnd; } } /* cubic function - calc*/ /* calculate c * time^3 / rtt, * while considering overflow in calculation of time^3 * (so time^3 is done by using 64 bit) * and without the support of division of 64bit numbers * (so all divisions are done by using 32 bit) * also NOTE the unit of those veriables * time = (t - K) / 2^bictcp_HZ * c = bic_scale >> 10 * rtt = (srtt >> 3) / HZ * !!! The following code does not have overflow problems, * if the cwnd < 1 million packets !!! */ t = (s32)(tcp_jiffies32 - ca->epoch_start); t += usecs_to_jiffies(ca->delay_min); /* change the unit from HZ to bictcp_HZ */ t <<= BICTCP_HZ; do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; else offs = t - ca->bic_K; /* c/rtt * (t-K)^3 */ delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); if (t < ca->bic_K) /* below origin*/ bic_target = ca->bic_origin_point - delta; else /* above origin*/ bic_target = ca->bic_origin_point + delta; /* cubic function - calc bictcp_cnt*/ if (bic_target > cwnd) { ca->cnt = cwnd / (bic_target - cwnd); } else { ca->cnt = 100 * cwnd; /* very small increment*/ } /* * The initial growth of cubic function may be too conservative * when the available bandwidth is still unknown. */ if (ca->last_max_cwnd == 0 && ca->cnt > 20) ca->cnt = 20; /* increase cwnd 5% per RTT */ tcp_friendliness: /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; delta = (cwnd * scale) >> 3; while (ca->ack_cnt > delta) { /* update tcp cwnd */ ca->ack_cnt -= delta; ca->tcp_cwnd++; } if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ delta = ca->tcp_cwnd - cwnd; max_cnt = cwnd / delta; if (ca->cnt > max_cnt) ca->cnt = max_cnt; } } /* The maximum rate of cwnd increase CUBIC allows is 1 packet per * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. */ ca->cnt = max(ca->cnt, 2U); } __bpf_kfunc static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } bictcp_update(ca, tcp_snd_cwnd(tp), acked); tcp_cong_avoid_ai(tp, ca->cnt, acked); } __bpf_kfunc static u32 cubictcp_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else ca->last_max_cwnd = tcp_snd_cwnd(tp); return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } __bpf_kfunc static void cubictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); bictcp_hystart_reset(sk); } } /* Account for TSO/GRO delays. * Otherwise short RTT flows could get too small ssthresh, since during * slow start we begin with small TSO packets and ca->delay_min would * not account for long aggregation delay when TSO packets get bigger. * Ideally even with a very small RTT we would like to have at least one * TSO packet being sent and received by GRO, and another one in qdisc layer. * We apply another 100% factor because @rate is doubled at this point. * We cap the cushion to 1ms. */ static u32 hystart_ack_delay(const struct sock *sk) { unsigned long rate; rate = READ_ONCE(sk->sk_pacing_rate); if (!rate) return 0; return min_t(u64, USEC_PER_MSEC, div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate)); } static void hystart_update(struct sock *sk, u32 delay) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 threshold; if (after(tp->snd_una, ca->end_seq)) bictcp_hystart_reset(sk); if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock_us(sk); /* first detection parameter - ack-train detection */ if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) { ca->last_ack = now; threshold = ca->delay_min + hystart_ack_delay(sk); /* Hystart ack train triggers if we get ack past * ca->delay_min/2. * Pacing might have delayed packets up to RTT/2 * during slow start. */ if (sk->sk_pacing_status == SK_PACING_NONE) threshold >>= 1; if ((s32)(now - ca->round_start) > threshold) { ca->found = 1; pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n", now - ca->round_start, threshold, ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp)); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ if (ca->curr_rtt > delay) ca->curr_rtt = delay; if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { ca->found = 1; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } } __bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; /* Some calls are for duplicates without timetamps */ if (sample->rtt_us < 0) return; /* Discard delay samples right after fast recovery */ if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ) return; delay = sample->rtt_us; if (delay == 0) delay = 1; /* first time call or link delay decreases */ if (ca->delay_min == 0 || ca->delay_min > delay) ca->delay_min = delay; /* hystart triggers when cwnd is larger than some threshold */ if (!ca->found && tcp_in_slow_start(tp) && hystart && tcp_snd_cwnd(tp) >= hystart_low_window) hystart_update(sk, delay); } static struct tcp_congestion_ops cubictcp __read_mostly = { .init = cubictcp_init, .ssthresh = cubictcp_recalc_ssthresh, .cong_avoid = cubictcp_cong_avoid, .set_state = cubictcp_state, .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = cubictcp_cwnd_event, .pkts_acked = cubictcp_acked, .owner = THIS_MODULE, .name = "cubic", }; BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, cubictcp_init) BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) BTF_ID_FLAGS(func, cubictcp_cong_avoid) BTF_ID_FLAGS(func, cubictcp_state) BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { .owner = THIS_MODULE, .set = &tcp_cubic_check_kfunc_ids, }; static int __init cubictcp_register(void) { int ret; BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms */ beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 / (BICTCP_BETA_SCALE - beta); cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 * so K = cubic_root( (wmax-cwnd)*rtt/c ) * the unit of K is bictcp_HZ=2^10, not HZ * * c = bic_scale >> 10 * rtt = 100ms * * the following code has been designed and tested for * cwnd < 1 million packets * RTT < 100 seconds * HZ < 1,000,00 (corresponding to 10 nano-second) */ /* 1/c * 2^2*bictcp_HZ * srtt */ cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */ /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set); if (ret < 0) return ret; return tcp_register_congestion_control(&cubictcp); } static void __exit cubictcp_unregister(void) { tcp_unregister_congestion_control(&cubictcp); } module_init(cubictcp_register); module_exit(cubictcp_unregister); MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CUBIC TCP"); MODULE_VERSION("2.3");
2 1 134 83 128 68 30 62 68 128 13 13 13 13 145 145 146 145 145 13 13 13 145 9 9 1 2 2 12 11 3 12 6 5 6 79 37 37 67 82 82 86 172 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright Red Hat Inc. 2017 * * This file is part of the SCTP kernel implementation * * These functions manipulate sctp stream queue/scheduling. * * Please send any bug reports or fixes you make to the * email addresched(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> */ #include <linux/list.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> /* First Come First Serve (a.k.a. FIFO) * RFC DRAFT ndata Section 3.1 */ static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid, __u16 value, gfp_t gfp) { return 0; } static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid, __u16 *value) { *value = 0; return 0; } static int sctp_sched_fcfs_init(struct sctp_stream *stream) { return 0; } static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { return 0; } static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) { } static void sctp_sched_fcfs_enqueue(struct sctp_outq *q, struct sctp_datamsg *msg) { } static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q) { struct sctp_stream *stream = &q->asoc->stream; struct sctp_chunk *ch = NULL; struct list_head *entry; if (list_empty(&q->out_chunk_list)) goto out; if (stream->out_curr) { ch = list_entry(stream->out_curr->ext->outq.next, struct sctp_chunk, stream_list); } else { entry = q->out_chunk_list.next; ch = list_entry(entry, struct sctp_chunk, list); } sctp_sched_dequeue_common(q, ch); out: return ch; } static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q, struct sctp_chunk *chunk) { } static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream) { } static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream) { } static struct sctp_sched_ops sctp_sched_fcfs = { .set = sctp_sched_fcfs_set, .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, .free_sid = sctp_sched_fcfs_free_sid, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, .dequeue_done = sctp_sched_fcfs_dequeue_done, .sched_all = sctp_sched_fcfs_sched_all, .unsched_all = sctp_sched_fcfs_unsched_all, }; static void sctp_sched_ops_fcfs_init(void) { sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs); } /* API to other parts of the stack */ static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; void sctp_sched_ops_register(enum sctp_sched_type sched, struct sctp_sched_ops *sched_ops) { sctp_sched_ops[sched] = sched_ops; } void sctp_sched_ops_init(void) { sctp_sched_ops_fcfs_init(); sctp_sched_ops_prio_init(); sctp_sched_ops_rr_init(); sctp_sched_ops_fc_init(); sctp_sched_ops_wfq_init(); } static void sctp_sched_free_sched(struct sctp_stream *stream) { struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *soute; int i; sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) { soute = SCTP_SO(stream, i)->ext; if (!soute) continue; sched->free_sid(stream, i); /* Give the next scheduler a clean slate. */ memset_after(soute, 0, outq); } } int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) { struct sctp_sched_ops *old = asoc->outqueue.sched; struct sctp_datamsg *msg = NULL; struct sctp_sched_ops *n; struct sctp_chunk *ch; int i, ret = 0; if (sched > SCTP_SS_MAX) return -EINVAL; n = sctp_sched_ops[sched]; if (old == n) return ret; if (old) sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = n; n->init(&asoc->stream); for (i = 0; i < asoc->stream.outcnt; i++) { if (!SCTP_SO(&asoc->stream, i)->ext) continue; ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); if (ret) goto err; } /* We have to requeue all chunks already queued. */ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { if (ch->msg == msg) continue; msg = ch->msg; n->enqueue(&asoc->outqueue, msg); } return ret; err: sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */ return ret; } int sctp_sched_get_sched(struct sctp_association *asoc) { int i; for (i = 0; i <= SCTP_SS_MAX; i++) if (asoc->outqueue.sched == sctp_sched_ops[i]) return i; return 0; } int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, __u16 value, gfp_t gfp) { if (sid >= asoc->stream.outcnt) return -EINVAL; if (!SCTP_SO(&asoc->stream, sid)->ext) { int ret; ret = sctp_stream_init_ext(&asoc->stream, sid); if (ret) return ret; } return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp); } int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, __u16 *value) { if (sid >= asoc->stream.outcnt) return -EINVAL; if (!SCTP_SO(&asoc->stream, sid)->ext) return 0; return asoc->outqueue.sched->get(&asoc->stream, sid, value); } void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) { if (!list_is_last(&ch->frag_list, &ch->msg->chunks) && !q->asoc->peer.intl_capable) { struct sctp_stream_out *sout; __u16 sid; /* datamsg is not finish, so save it as current one, * in case application switch scheduler or a higher * priority stream comes in. */ sid = sctp_chunk_stream_no(ch); sout = SCTP_SO(&q->asoc->stream, sid); q->asoc->stream.out_curr = sout; return; } q->asoc->stream.out_curr = NULL; q->sched->dequeue_done(q, ch); } /* Auxiliary functions for the schedulers */ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) { list_del_init(&ch->list); list_del_init(&ch->stream_list); q->out_qlen -= ch->skb->len; } int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *ext = SCTP_SO(stream, sid)->ext; INIT_LIST_HEAD(&ext->outq); return sched->init_sid(stream, sid, gfp); } struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) { struct sctp_association *asoc; asoc = container_of(stream, struct sctp_association, stream); return asoc->outqueue.sched; }
527 1749 399 1738 94 190 163 141 132 1580 5 4 2105 1771 34 1749 34 1774 457 8008 190 59 1426 12 70 14 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H #include <linux/sched/coredump.h> #include <linux/mm_types.h> #include <linux/fs.h> /* only for vma_is_dax() */ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); void huge_pmd_set_accessed(struct vm_fault *vmf); int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, unsigned long addr, struct vm_area_struct *vma); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); #else static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) { } #endif vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf); bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long next); int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr); int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr); bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, unsigned long cp_flags); vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_UNSUPPORTED, TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, }; struct kobject; struct kobj_attribute; ssize_t single_hugepage_flag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count, enum transparent_hugepage_flag flag); ssize_t single_hugepage_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) /* * Mask of all large folio orders supported for anonymous THP; all orders up to * and including PMD_ORDER, except order-0 (which is not "huge") and order-1 * (which is a limitation of the THP implementation). */ #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) /* * Mask of all large folio orders supported for file THP. */ #define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) /* * Mask of all large folio orders supported for THP. */ #define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) #define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \ (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order))) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) #define HPAGE_PUD_SHIFT PUD_SHIFT #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) extern unsigned long transparent_hugepage_flags; extern unsigned long huge_anon_orders_always; extern unsigned long huge_anon_orders_madvise; extern unsigned long huge_anon_orders_inherit; static inline bool hugepage_global_enabled(void) { return transparent_hugepage_flags & ((1<<TRANSPARENT_HUGEPAGE_FLAG) | (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)); } static inline bool hugepage_global_always(void) { return transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG); } static inline bool hugepage_flags_enabled(void) { /* * We cover both the anon and the file-backed case here; we must return * true if globally enabled, even when all anon sizes are set to never. * So we don't need to look at huge_anon_orders_inherit. */ return hugepage_global_enabled() || huge_anon_orders_always || huge_anon_orders_madvise; } static inline int highest_order(unsigned long orders) { return fls_long(orders) - 1; } static inline int next_order(unsigned long *orders, int prev) { *orders &= ~BIT(prev); return highest_order(*orders); } /* * Do the below checks: * - For file vma, check if the linear page offset of vma is * order-aligned within the file. The hugepage is * guaranteed to be order-aligned within the file, but we must * check that the order-aligned addresses in the VMA map to * order-aligned offsets within the file, else the hugepage will * not be mappable. * - For all vmas, check if the haddr is in an aligned hugepage * area. */ static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, unsigned long addr, int order) { unsigned long hpage_size = PAGE_SIZE << order; unsigned long haddr; /* Don't have to check pgoff for anonymous vma */ if (!vma_is_anonymous(vma)) { if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, hpage_size >> PAGE_SHIFT)) return false; } haddr = ALIGN_DOWN(addr, hpage_size); if (haddr < vma->vm_start || haddr + hpage_size > vma->vm_end) return false; return true; } /* * Filter the bitfield of input orders to the ones suitable for use in the vma. * See thp_vma_suitable_order(). * All orders that pass the checks are returned as a bitfield. */ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, unsigned long addr, unsigned long orders) { int order; /* * Iterate over orders, highest to lowest, removing orders that don't * meet alignment requirements from the set. Exit loop at first order * that meets requirements, since all lower orders must also meet * requirements. */ order = highest_order(orders); while (orders) { if (thp_vma_suitable_order(vma, addr, order)) break; order = next_order(&orders, order); } return orders; } static inline bool file_thp_enabled(struct vm_area_struct *vma) { struct inode *inode; if (!vma->vm_file) return false; inode = vma->vm_file->f_inode; return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, bool smaps, bool in_pf, bool enforce_sysfs, unsigned long orders); /** * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma * @vma: the vm area to check * @vm_flags: use these vm_flags instead of vma->vm_flags * @smaps: whether answer will be used for smaps file * @in_pf: whether answer will be used by page fault handler * @enforce_sysfs: whether sysfs config should be taken into account * @orders: bitfield of all orders to consider * * Calculates the intersection of the requested hugepage orders and the allowed * hugepage orders for the provided vma. Permitted orders are encoded as a set * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3 * corresponds to order-3, etc). Order-0 is never considered a hugepage order. * * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage * orders are allowed. */ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, bool smaps, bool in_pf, bool enforce_sysfs, unsigned long orders) { /* Optimization to check if required orders are enabled early. */ if (enforce_sysfs && vma_is_anonymous(vma)) { unsigned long mask = READ_ONCE(huge_anon_orders_always); if (vm_flags & VM_HUGEPAGE) mask |= READ_ONCE(huge_anon_orders_madvise); if (hugepage_global_always() || ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled())) mask |= READ_ONCE(huge_anon_orders_inherit); orders &= mask; if (!orders) return 0; } return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, orders); } #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); void folio_prep_large_rmappable(struct folio *folio); bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); static inline int split_huge_page(struct page *page) { return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false, NULL); \ } while (0) void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct folio *folio); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ if (pud_trans_huge(*____pud) \ || pud_devmap(*____pud)) \ __split_huge_pud(__vma, __pud, __address); \ } while (0) int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); static inline int is_swap_pmd(pmd_t pmd) { return !pmd_none(pmd) && !pmd_present(pmd); } /* mmap_lock must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { if (pud_trans_huge(*pud) || pud_devmap(*pud)) return __pud_trans_huge_lock(pud, vma); else return NULL; } /** * folio_test_pmd_mappable - Can we map this folio with a PMD? * @folio: The folio to test */ static inline bool folio_test_pmd_mappable(struct folio *folio) { return folio_order(folio) >= HPAGE_PMD_ORDER; } struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap); vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); extern struct page *huge_zero_page; extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_page(struct page *page) { return READ_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) { return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); } static inline bool is_huge_zero_pud(pud_t pud) { return false; } struct page *mm_get_huge_zero_page(struct mm_struct *mm); void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) static inline bool folio_test_pmd_mappable(struct folio *folio) { return false; } static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, unsigned long addr, int order) { return false; } static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, unsigned long addr, unsigned long orders) { return 0; } static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, bool smaps, bool in_pf, bool enforce_sysfs, unsigned long orders) { return 0; } static inline void folio_prep_large_rmappable(struct folio *folio) {} #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL static inline bool can_split_folio(struct folio *folio, int *pextra_pins) { return false; } static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order) { return 0; } static inline int split_huge_page(struct page *page) { return 0; } static inline void deferred_split_folio(struct folio *folio) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct folio *folio) {} #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { return -EINVAL; } static inline int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { return -EINVAL; } static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next) { } static inline int is_swap_pmd(pmd_t pmd) { return 0; } static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { return NULL; } static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) { return 0; } static inline bool is_huge_zero_page(struct page *page) { return false; } static inline bool is_huge_zero_pmd(pmd_t pmd) { return false; } static inline bool is_huge_zero_pud(pud_t pud) { return false; } static inline void mm_put_huge_zero_page(struct mm_struct *mm) { return; } static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap) { return NULL; } static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap) { return NULL; } static inline bool thp_migration_supported(void) { return false; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, struct list_head *list, int new_order) { return split_huge_page_to_list_to_order(&folio->page, list, new_order); } static inline int split_folio_to_order(struct folio *folio, int new_order) { return split_folio_to_list_to_order(folio, NULL, new_order); } #define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) #define split_folio(f) split_folio_to_order(f, 0) /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to * false */ #ifndef arch_thp_swp_supported static inline bool arch_thp_swp_supported(void) { return true; } #endif #endif /* _LINUX_HUGE_MM_H */
4 239 322 324 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2008 Oracle. All rights reserved. */ #ifndef BTRFS_LOCKING_H #define BTRFS_LOCKING_H #include <linux/atomic.h> #include <linux/wait.h> #include <linux/lockdep.h> #include <linux/percpu_counter.h> #include "extent_io.h" #include "locking.h" struct extent_buffer; struct btrfs_path; struct btrfs_root; #define BTRFS_WRITE_LOCK 1 #define BTRFS_READ_LOCK 2 /* * We are limited in number of subclasses by MAX_LOCKDEP_SUBCLASSES, which at * the time of this patch is 8, which is how many we use. Keep this in mind if * you decide you want to add another subclass. */ enum btrfs_lock_nesting { BTRFS_NESTING_NORMAL, /* * When we COW a block we are holding the lock on the original block, * and since our lockdep maps are rootid+level, this confuses lockdep * when we lock the newly allocated COW'd block. Handle this by having * a subclass for COW'ed blocks so that lockdep doesn't complain. */ BTRFS_NESTING_COW, /* * Oftentimes we need to lock adjacent nodes on the same level while * still holding the lock on the original node we searched to, such as * for searching forward or for split/balance. * * Because of this we need to indicate to lockdep that this is * acceptable by having a different subclass for each of these * operations. */ BTRFS_NESTING_LEFT, BTRFS_NESTING_RIGHT, /* * When splitting we will be holding a lock on the left/right node when * we need to cow that node, thus we need a new set of subclasses for * these two operations. */ BTRFS_NESTING_LEFT_COW, BTRFS_NESTING_RIGHT_COW, /* * When splitting we may push nodes to the left or right, but still use * the subsequent nodes in our path, keeping our locks on those adjacent * blocks. Thus when we go to allocate a new split block we've already * used up all of our available subclasses, so this subclass exists to * handle this case where we need to allocate a new split block. */ BTRFS_NESTING_SPLIT, /* * When promoting a new block to a root we need to have a special * subclass so we don't confuse lockdep, as it will appear that we are * locking a higher level node before a lower level one. Copying also * has this problem as it appears we're locking the same block again * when we make a snapshot of an existing root. */ BTRFS_NESTING_NEW_ROOT, /* * We are limited to MAX_LOCKDEP_SUBLCLASSES number of subclasses, so * add this in here and add a static_assert to keep us from going over * the limit. As of this writing we're limited to 8, and we're * definitely using 8, hence this check to keep us from messing up in * the future. */ BTRFS_NESTING_MAX, }; enum btrfs_lockdep_trans_states { BTRFS_LOCKDEP_TRANS_COMMIT_PREP, BTRFS_LOCKDEP_TRANS_UNBLOCKED, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED, BTRFS_LOCKDEP_TRANS_COMPLETED, }; /* * Lockdep annotation for wait events. * * @owner: The struct where the lockdep map is defined * @lock: The lockdep map corresponding to a wait event * * This macro is used to annotate a wait event. In this case a thread acquires * the lockdep map as writer (exclusive lock) because it has to block until all * the threads that hold the lock as readers signal the condition for the wait * event and release their locks. */ #define btrfs_might_wait_for_event(owner, lock) \ do { \ rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_); \ rwsem_release(&owner->lock##_map, _THIS_IP_); \ } while (0) /* * Protection for the resource/condition of a wait event. * * @owner: The struct where the lockdep map is defined * @lock: The lockdep map corresponding to a wait event * * Many threads can modify the condition for the wait event at the same time * and signal the threads that block on the wait event. The threads that modify * the condition and do the signaling acquire the lock as readers (shared * lock). */ #define btrfs_lockdep_acquire(owner, lock) \ rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_) /* * Used after signaling the condition for a wait event to release the lockdep * map held by a reader thread. */ #define btrfs_lockdep_release(owner, lock) \ rwsem_release(&owner->lock##_map, _THIS_IP_) /* * Macros for the transaction states wait events, similar to the generic wait * event macros. */ #define btrfs_might_wait_for_state(owner, i) \ do { \ rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \ rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_); \ } while (0) #define btrfs_trans_state_lockdep_acquire(owner, i) \ rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_) #define btrfs_trans_state_lockdep_release(owner, i) \ rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_) /* Initialization of the lockdep map */ #define btrfs_lockdep_init_map(owner, lock) \ do { \ static struct lock_class_key lock##_key; \ lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0); \ } while (0) /* Initialization of the transaction states lockdep maps. */ #define btrfs_state_lockdep_init_map(owner, lock, state) \ do { \ static struct lock_class_key lock##_key; \ lockdep_init_map(&owner->btrfs_state_change_map[state], #lock, \ &lock##_key, 0); \ } while (0) static_assert(BTRFS_NESTING_MAX <= MAX_LOCKDEP_SUBCLASSES, "too many lock subclasses defined"); void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest); void btrfs_tree_lock(struct extent_buffer *eb); void btrfs_tree_unlock(struct extent_buffer *eb); void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest); void btrfs_tree_read_lock(struct extent_buffer *eb); void btrfs_tree_read_unlock(struct extent_buffer *eb); int btrfs_try_tree_read_lock(struct extent_buffer *eb); int btrfs_try_tree_write_lock(struct extent_buffer *eb); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root); #ifdef CONFIG_BTRFS_DEBUG static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) { lockdep_assert_held_write(&eb->lock); } #else static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) { } #endif void btrfs_unlock_up_safe(struct btrfs_path *path, int level); static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) { if (rw == BTRFS_WRITE_LOCK) btrfs_tree_unlock(eb); else if (rw == BTRFS_READ_LOCK) btrfs_tree_read_unlock(eb); else BUG(); } struct btrfs_drew_lock { atomic_t readers; atomic_t writers; wait_queue_head_t pending_writers; wait_queue_head_t pending_readers; }; void btrfs_drew_lock_init(struct btrfs_drew_lock *lock); void btrfs_drew_write_lock(struct btrfs_drew_lock *lock); bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock); void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock); void btrfs_drew_read_lock(struct btrfs_drew_lock *lock); void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level); void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb); #else static inline void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level) { } static inline void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb) { } #endif #endif
2 2 2 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 // SPDX-License-Identifier: GPL-2.0 /* * Handle caching attributes in page tables (PAT) * * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> * Suresh B Siddha <suresh.b.siddha@intel.com> * * Interval tree used to store the PAT memory type reservations. */ #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/interval_tree_generic.h> #include <linux/sched.h> #include <linux/gfp.h> #include <linux/pgtable.h> #include <asm/memtype.h> #include "memtype.h" /* * The memtype tree keeps track of memory type for specific * physical memory areas. Without proper tracking, conflicting memory * types in different mappings can cause CPU cache corruption. * * The tree is an interval tree (augmented rbtree) which tree is ordered * by the starting address. The tree can contain multiple entries for * different regions which overlap. All the aliases have the same * cache attributes of course, as enforced by the PAT logic. * * memtype_lock protects the rbtree. */ static inline u64 interval_start(struct memtype *entry) { return entry->start; } static inline u64 interval_end(struct memtype *entry) { return entry->end - 1; } INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, interval_start, interval_end, static, interval) static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; enum { MEMTYPE_EXACT_MATCH = 0, MEMTYPE_END_MATCH = 1 }; static struct memtype *memtype_match(u64 start, u64 end, int match_type) { struct memtype *entry_match; entry_match = interval_iter_first(&memtype_rbroot, start, end-1); while (entry_match != NULL && entry_match->start < end) { if ((match_type == MEMTYPE_EXACT_MATCH) && (entry_match->start == start) && (entry_match->end == end)) return entry_match; if ((match_type == MEMTYPE_END_MATCH) && (entry_match->start < start) && (entry_match->end == end)) return entry_match; entry_match = interval_iter_next(entry_match, start, end-1); } return NULL; /* Returns NULL if there is no match */ } static int memtype_check_conflict(u64 start, u64 end, enum page_cache_mode reqtype, enum page_cache_mode *newtype) { struct memtype *entry_match; enum page_cache_mode found_type = reqtype; entry_match = interval_iter_first(&memtype_rbroot, start, end-1); if (entry_match == NULL) goto success; if (entry_match->type != found_type && newtype == NULL) goto failure; dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end); found_type = entry_match->type; entry_match = interval_iter_next(entry_match, start, end-1); while (entry_match) { if (entry_match->type != found_type) goto failure; entry_match = interval_iter_next(entry_match, start, end-1); } success: if (newtype) *newtype = found_type; return 0; failure: pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", current->comm, current->pid, start, end, cattr_name(found_type), cattr_name(entry_match->type)); return -EBUSY; } int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type) { int err = 0; err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type); if (err) return err; if (ret_type) entry_new->type = *ret_type; interval_insert(entry_new, &memtype_rbroot); return 0; } struct memtype *memtype_erase(u64 start, u64 end) { struct memtype *entry_old; /* * Since the memtype_rbroot tree allows overlapping ranges, * memtype_erase() checks with EXACT_MATCH first, i.e. free * a whole node for the munmap case. If no such entry is found, * it then checks with END_MATCH, i.e. shrink the size of a node * from the end for the mremap case. */ entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); if (!entry_old) { entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); if (!entry_old) return ERR_PTR(-EINVAL); } if (entry_old->start == start) { /* munmap: erase this node */ interval_remove(entry_old, &memtype_rbroot); } else { /* mremap: update the end value of this node */ interval_remove(entry_old, &memtype_rbroot); entry_old->end = start; interval_insert(entry_old, &memtype_rbroot); return NULL; } return entry_old; } struct memtype *memtype_lookup(u64 addr) { return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1); } /* * Debugging helper, copy the Nth entry of the tree into a * a copy for printout. This allows us to print out the tree * via debugfs, without holding the memtype_lock too long: */ #ifdef CONFIG_DEBUG_FS int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos) { struct memtype *entry_match; int i = 1; entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); while (entry_match && pos != i) { entry_match = interval_iter_next(entry_match, 0, ULONG_MAX); i++; } if (entry_match) { /* pos == i */ *entry_out = *entry_match; return 0; } else { return 1; } } #endif
5 1 1 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "truncate.h" struct io_ftrunc { struct file *file; loff_t len; }; int io_ftruncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc); if (sqe->rw_flags || sqe->addr || sqe->len || sqe->buf_index || sqe->splice_fd_in || sqe->addr3) return -EINVAL; ft->len = READ_ONCE(sqe->off); req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_ftruncate(struct io_kiocb *req, unsigned int issue_flags) { struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_ftruncate(req->file, ft->len, 1); io_req_set_res(req, ret, 0); return IOU_OK; }
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 // SPDX-License-Identifier: GPL-2.0-only /* * ebtable_nat * * Authors: * Bart De Schuymer <bdschuym@pandora.be> * * April, 2002 * */ #include <linux/netfilter_bridge/ebtables.h> #include <uapi/linux/netfilter_bridge.h> #include <linux/module.h> #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ (1 << NF_BR_POST_ROUTING)) static struct ebt_entries initial_chains[] = { { .name = "PREROUTING", .policy = EBT_ACCEPT, }, { .name = "OUTPUT", .policy = EBT_ACCEPT, }, { .name = "POSTROUTING", .policy = EBT_ACCEPT, } }; static struct ebt_replace_kernel initial_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .entries_size = 3 * sizeof(struct ebt_entries), .hook_entry = { [NF_BR_PRE_ROUTING] = &initial_chains[0], [NF_BR_LOCAL_OUT] = &initial_chains[1], [NF_BR_POST_ROUTING] = &initial_chains[2], }, .entries = (char *)initial_chains, }; static const struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, .me = THIS_MODULE, }; static const struct nf_hook_ops ebt_ops_nat[] = { { .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_NAT_DST_BRIDGED, }, }; static int frame_nat_table_init(struct net *net) { return ebt_register_table(net, &frame_nat, ebt_ops_nat); } static void __net_exit frame_nat_net_pre_exit(struct net *net) { ebt_unregister_table_pre_exit(net, "nat"); } static void __net_exit frame_nat_net_exit(struct net *net) { ebt_unregister_table(net, "nat"); } static struct pernet_operations frame_nat_net_ops = { .exit = frame_nat_net_exit, .pre_exit = frame_nat_net_pre_exit, }; static int __init ebtable_nat_init(void) { int ret = ebt_register_template(&frame_nat, frame_nat_table_init); if (ret) return ret; ret = register_pernet_subsys(&frame_nat_net_ops); if (ret) { ebt_unregister_template(&frame_nat); return ret; } return ret; } static void __exit ebtable_nat_fini(void) { unregister_pernet_subsys(&frame_nat_net_ops); ebt_unregister_template(&frame_nat); } module_init(ebtable_nat_init); module_exit(ebtable_nat_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ebtables legacy stateless nat table");
1 1 1 1 1 1 39 38 39 1 1 1 5 2 2 1 1 2 5 5 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 // SPDX-License-Identifier: LGPL-2.1 /* * Copyright IBM Corporation, 2010 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> */ #include <linux/module.h> #include <linux/fs.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/posix_acl_xattr.h> #include "xattr.h" #include "acl.h" #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" static struct posix_acl *v9fs_fid_get_acl(struct p9_fid *fid, const char *name) { ssize_t size; void *value = NULL; struct posix_acl *acl = NULL; size = v9fs_fid_xattr_get(fid, name, NULL, 0); if (size < 0) return ERR_PTR(size); if (size == 0) return ERR_PTR(-ENODATA); value = kzalloc(size, GFP_NOFS); if (!value) return ERR_PTR(-ENOMEM); size = v9fs_fid_xattr_get(fid, name, value, size); if (size < 0) acl = ERR_PTR(size); else if (size == 0) acl = ERR_PTR(-ENODATA); else acl = posix_acl_from_xattr(&init_user_ns, value, size); kfree(value); return acl; } static struct posix_acl *v9fs_acl_get(struct dentry *dentry, const char *name) { struct p9_fid *fid; struct posix_acl *acl = NULL; fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); acl = v9fs_fid_get_acl(fid, name); p9_fid_put(fid); return acl; } static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, const char *name) { int retval; struct posix_acl *acl = NULL; acl = v9fs_fid_get_acl(fid, name); if (!IS_ERR(acl)) return acl; retval = PTR_ERR(acl); if (retval == -ENODATA || retval == -ENOSYS || retval == -EOPNOTSUPP) return NULL; /* map everything else to -EIO */ return ERR_PTR(-EIO); } int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) { int retval = 0; struct posix_acl *pacl, *dacl; struct v9fs_session_info *v9ses; v9ses = v9fs_inode2v9ses(inode); if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); return 0; } /* get the default/access acl values and cache them */ dacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_DEFAULT); pacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_ACCESS); if (!IS_ERR(dacl) && !IS_ERR(pacl)) { set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); } else retval = -EIO; if (!IS_ERR(dacl)) posix_acl_release(dacl); if (!IS_ERR(pacl)) posix_acl_release(pacl); return retval; } static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) { struct posix_acl *acl; /* * 9p Always cache the acl value when * instantiating the inode (v9fs_inode_from_fid) */ acl = get_cached_acl(inode, type); BUG_ON(is_uncached_acl(acl)); return acl; } struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type, bool rcu) { struct v9fs_session_info *v9ses; if (rcu) return ERR_PTR(-ECHILD); v9ses = v9fs_inode2v9ses(inode); if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { /* * On access = client and acl = on mode get the acl * values from the server */ return NULL; } return v9fs_get_cached_acl(inode, type); } struct posix_acl *v9fs_iop_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { struct v9fs_session_info *v9ses; v9ses = v9fs_dentry2v9ses(dentry); /* We allow set/get/list of acl when access=client is not specified. */ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) return v9fs_acl_get(dentry, posix_acl_xattr_name(type)); return v9fs_get_cached_acl(d_inode(dentry), type); } int v9fs_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int retval; size_t size = 0; void *value = NULL; const char *acl_name; struct v9fs_session_info *v9ses; struct inode *inode = d_inode(dentry); if (acl) { retval = posix_acl_valid(inode->i_sb->s_user_ns, acl); if (retval) goto err_out; size = posix_acl_xattr_size(acl->a_count); value = kzalloc(size, GFP_NOFS); if (!value) { retval = -ENOMEM; goto err_out; } retval = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (retval < 0) goto err_out; } /* * set the attribute on the remote. Without even looking at the * xattr value. We leave it to the server to validate */ acl_name = posix_acl_xattr_name(type); v9ses = v9fs_dentry2v9ses(dentry); if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { retval = v9fs_xattr_set(dentry, acl_name, value, size, 0); goto err_out; } if (S_ISLNK(inode->i_mode)) { retval = -EOPNOTSUPP; goto err_out; } if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) { retval = -EPERM; goto err_out; } switch (type) { case ACL_TYPE_ACCESS: if (acl) { struct iattr iattr = {}; struct posix_acl *acl_mode = acl; retval = posix_acl_update_mode(&nop_mnt_idmap, inode, &iattr.ia_mode, &acl_mode); if (retval) goto err_out; if (!acl_mode) { /* * ACL can be represented by the mode bits. * So don't update ACL below. */ kfree(value); value = NULL; size = 0; } iattr.ia_valid = ATTR_MODE; /* * FIXME should we update ctime ? * What is the following setxattr update the mode ? */ v9fs_vfs_setattr_dotl(&nop_mnt_idmap, dentry, &iattr); } break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) { retval = acl ? -EINVAL : 0; goto err_out; } break; } retval = v9fs_xattr_set(dentry, acl_name, value, size, 0); if (!retval) set_cached_acl(inode, type, acl); err_out: kfree(value); return retval; } static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl) { int retval; char *name; size_t size; void *buffer; if (!acl) return 0; /* Set a setxattr request to server */ size = posix_acl_xattr_size(acl->a_count); buffer = kmalloc(size, GFP_KERNEL); if (!buffer) return -ENOMEM; retval = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); if (retval < 0) goto err_free_out; switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; break; default: BUG(); } retval = v9fs_fid_xattr_set(fid, name, buffer, size, 0); err_free_out: kfree(buffer); return retval; } int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid) { int retval = 0; struct posix_acl *acl; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS); if (acl) { retval = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); if (retval) return retval; set_cached_acl(inode, ACL_TYPE_ACCESS, acl); retval = v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl); posix_acl_release(acl); } return retval; } int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid, struct posix_acl *dacl, struct posix_acl *acl) { set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); set_cached_acl(inode, ACL_TYPE_ACCESS, acl); v9fs_set_acl(fid, ACL_TYPE_DEFAULT, dacl); v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl); return 0; } void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl) { posix_acl_release(dacl); posix_acl_release(acl); } int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { int retval = 0; umode_t mode = *modep; struct posix_acl *acl = NULL; if (!S_ISLNK(mode)) { acl = v9fs_get_cached_acl(dir, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) return PTR_ERR(acl); if (!acl) mode &= ~current_umask(); } if (acl) { if (S_ISDIR(mode)) *dpacl = posix_acl_dup(acl); retval = __posix_acl_create(&acl, GFP_NOFS, &mode); if (retval < 0) return retval; if (retval > 0) *pacl = acl; else posix_acl_release(acl); } *modep = mode; return 0; }
20 20 20 20 20 20 20 20 20 19 20 20 20 20 20 20 20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/zstd.h> #include "common/zstd_deps.h" #include "common/zstd_internal.h" #define ZSTD_FORWARD_IF_ERR(ret) \ do { \ size_t const __ret = (ret); \ if (ZSTD_isError(__ret)) \ return __ret; \ } while (0) static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters, unsigned long long pledged_src_size) { ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset( cctx, ZSTD_reset_session_and_parameters)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize( cctx, pledged_src_size)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_windowLog, parameters->cParams.windowLog)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_hashLog, parameters->cParams.hashLog)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_chainLog, parameters->cParams.chainLog)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_searchLog, parameters->cParams.searchLog)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_minMatch, parameters->cParams.minMatch)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_targetLength, parameters->cParams.targetLength)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_strategy, parameters->cParams.strategy)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag)); ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag)); return 0; } int zstd_min_clevel(void) { return ZSTD_minCLevel(); } EXPORT_SYMBOL(zstd_min_clevel); int zstd_max_clevel(void) { return ZSTD_maxCLevel(); } EXPORT_SYMBOL(zstd_max_clevel); size_t zstd_compress_bound(size_t src_size) { return ZSTD_compressBound(src_size); } EXPORT_SYMBOL(zstd_compress_bound); zstd_parameters zstd_get_params(int level, unsigned long long estimated_src_size) { return ZSTD_getParams(level, estimated_src_size, 0); } EXPORT_SYMBOL(zstd_get_params); size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams) { return ZSTD_estimateCCtxSize_usingCParams(*cparams); } EXPORT_SYMBOL(zstd_cctx_workspace_bound); zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) { if (workspace == NULL) return NULL; return ZSTD_initStaticCCtx(workspace, workspace_size); } EXPORT_SYMBOL(zstd_init_cctx); size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, const void *src, size_t src_size, const zstd_parameters *parameters) { ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size)); return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size); } EXPORT_SYMBOL(zstd_compress_cctx); size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams) { return ZSTD_estimateCStreamSize_usingCParams(*cparams); } EXPORT_SYMBOL(zstd_cstream_workspace_bound); zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, unsigned long long pledged_src_size, void *workspace, size_t workspace_size) { zstd_cstream *cstream; if (workspace == NULL) return NULL; cstream = ZSTD_initStaticCStream(workspace, workspace_size); if (cstream == NULL) return NULL; /* 0 means unknown in linux zstd API but means 0 in new zstd API */ if (pledged_src_size == 0) pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN; if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size))) return NULL; return cstream; } EXPORT_SYMBOL(zstd_init_cstream); size_t zstd_reset_cstream(zstd_cstream *cstream, unsigned long long pledged_src_size) { if (pledged_src_size == 0) pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN; ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only) ); ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_setPledgedSrcSize(cstream, pledged_src_size) ); return 0; } EXPORT_SYMBOL(zstd_reset_cstream); size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, zstd_in_buffer *input) { return ZSTD_compressStream(cstream, output, input); } EXPORT_SYMBOL(zstd_compress_stream); size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output) { return ZSTD_flushStream(cstream, output); } EXPORT_SYMBOL(zstd_flush_stream); size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output) { return ZSTD_endStream(cstream, output); } EXPORT_SYMBOL(zstd_end_stream); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Zstd Compressor");
2 7 7 7 4 3 2 6 7 3 1 1 1 3 7 5 5 5 4 4 4 4 4 3 2 3 3 2 4 4 5 4 6 3 10 10 10 3 7 7 1 7 10 10 1 5 10 9 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 // SPDX-License-Identifier: GPL-2.0-or-later /* * udp_diag.c Module for monitoring UDP transport protocols sockets. * * Authors: Pavel Emelyanov, <xemul@parallels.com> */ #include <linux/module.h> #include <linux/inet_diag.h> #include <linux/udp.h> #include <net/udp.h> #include <net/udplite.h> #include <linux/sock_diag.h> static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, struct nlattr *bc, bool net_admin) { if (!inet_diag_bc_sk(bc, sk)) return 0; return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI, net_admin); } static int udp_dump_one(struct udp_table *tbl, struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { struct sk_buff *in_skb = cb->skb; int err; struct sock *sk = NULL; struct sk_buff *rep; struct net *net = sock_net(in_skb->sk); rcu_read_lock(); if (req->sdiag_family == AF_INET) /* src and dst are swapped for historical reasons */ sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_if, 0, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, req->id.idiag_if, 0, tbl, NULL); #endif if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); err = -ENOENT; if (!sk) goto out_nosk; err = sock_diag_check_cookie(sk, req->id.idiag_cookie); if (err) goto out; err = -ENOMEM; rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + inet_diag_msg_attrs_size() + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; err = inet_sk_diag_fill(sk, NULL, rep, cb, req, 0, netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); goto out; } err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); out: if (sk) sock_put(sk); out_nosk: return err; } static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; struct nlattr *bc; cb_data = cb->data; bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { struct udp_hslot *hslot = &table->hash[slot]; struct sock *sk; num = 0; if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); sk_for_each(sk, &hslot->head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) goto next; if (!(r->idiag_states & (1 << sk->sk_state))) goto next; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next; if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next; if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next; if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { spin_unlock_bh(&hslot->lock); goto done; } next: num++; } spin_unlock_bh(&hslot->lock); } done: cb->args[0] = slot; cb->args[1] = num; } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { udp_dump(sock_net(cb->skb->sk)->ipv4.udp_table, skb, cb, r); } static int udp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { return udp_dump_one(sock_net(cb->skb->sk)->ipv4.udp_table, cb, req); } static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info) { r->idiag_rqueue = udp_rqueue_get(sk); r->idiag_wqueue = sk_wmem_alloc_get(sk); } #ifdef CONFIG_INET_DIAG_DESTROY static int __udp_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req, struct udp_table *tbl) { struct net *net = sock_net(in_skb->sk); struct sock *sk; int err; rcu_read_lock(); if (req->sdiag_family == AF_INET) sk = __udp4_lib_lookup(net, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if, 0, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) sk = __udp4_lib_lookup(net, req->id.idiag_dst[3], req->id.idiag_dport, req->id.idiag_src[3], req->id.idiag_sport, req->id.idiag_if, 0, tbl, NULL); else sk = __udp6_lib_lookup(net, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, req->id.idiag_if, 0, tbl, NULL); } #endif else { rcu_read_unlock(); return -EINVAL; } if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); if (!sk) return -ENOENT; if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { sock_put(sk); return -ENOENT; } err = sock_diag_destroy(sk, ECONNABORTED); sock_put(sk); return err; } static int udp_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { return __udp_diag_destroy(in_skb, req, sock_net(in_skb->sk)->ipv4.udp_table); } static int udplite_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { return __udp_diag_destroy(in_skb, req, &udplite_table); } #endif static const struct inet_diag_handler udp_diag_handler = { .owner = THIS_MODULE, .dump = udp_diag_dump, .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, .idiag_info_size = 0, #ifdef CONFIG_INET_DIAG_DESTROY .destroy = udp_diag_destroy, #endif }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { udp_dump(&udplite_table, skb, cb, r); } static int udplite_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { return udp_dump_one(&udplite_table, cb, req); } static const struct inet_diag_handler udplite_diag_handler = { .owner = THIS_MODULE, .dump = udplite_diag_dump, .dump_one = udplite_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, .idiag_info_size = 0, #ifdef CONFIG_INET_DIAG_DESTROY .destroy = udplite_diag_destroy, #endif }; static int __init udp_diag_init(void) { int err; err = inet_diag_register(&udp_diag_handler); if (err) goto out; err = inet_diag_register(&udplite_diag_handler); if (err) goto out_lite; out: return err; out_lite: inet_diag_unregister(&udp_diag_handler); goto out; } static void __exit udp_diag_exit(void) { inet_diag_unregister(&udplite_diag_handler); inet_diag_unregister(&udp_diag_handler); } module_init(udp_diag_init); module_exit(udp_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */);
5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 // SPDX-License-Identifier: GPL-2.0-or-later /* * Module kallsyms support * * Copyright (C) 2010 Rusty Russell */ #include <linux/module.h> #include <linux/module_symbol.h> #include <linux/kallsyms.h> #include <linux/buildid.h> #include <linux/bsearch.h> #include "internal.h" /* Lookup exported symbol in given range of kernel_symbols */ static const struct kernel_symbol *lookup_exported_symbol(const char *name, const struct kernel_symbol *start, const struct kernel_symbol *stop) { return bsearch(name, start, stop - start, sizeof(struct kernel_symbol), cmp_name); } static int is_exported(const char *name, unsigned long value, const struct module *mod) { const struct kernel_symbol *ks; if (!mod) ks = lookup_exported_symbol(name, __start___ksymtab, __stop___ksymtab); else ks = lookup_exported_symbol(name, mod->syms, mod->syms + mod->num_syms); return ks && kernel_symbol_value(ks) == value; } /* As per nm */ static char elf_type(const Elf_Sym *sym, const struct load_info *info) { const Elf_Shdr *sechdrs = info->sechdrs; if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) return 'v'; else return 'w'; } if (sym->st_shndx == SHN_UNDEF) return 'U'; if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu) return 'a'; if (sym->st_shndx >= SHN_LORESERVE) return '?'; if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR) return 't'; if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) { if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE)) return 'r'; else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) return 'g'; else return 'd'; } if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) return 's'; else return 'b'; } if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name, ".debug")) { return 'n'; } return '?'; } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, unsigned int shnum, unsigned int pcpundx) { const Elf_Shdr *sec; enum mod_mem_type type; if (src->st_shndx == SHN_UNDEF || src->st_shndx >= shnum || !src->st_name) return false; #ifdef CONFIG_KALLSYMS_ALL if (src->st_shndx == pcpundx) return true; #endif sec = sechdrs + src->st_shndx; type = sec->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; if (!(sec->sh_flags & SHF_ALLOC) #ifndef CONFIG_KALLSYMS_ALL || !(sec->sh_flags & SHF_EXECINSTR) #endif || mod_mem_type_is_init(type)) return false; return true; } /* * We only allocate and copy the strings needed by the parts of symtab * we keep. This is simple, but has the effect of making multiple * copies of duplicates. We could be more sophisticated, see * linux-kernel thread starting with * <73defb5e4bca04a6431392cc341112b1@localhost>. */ void layout_symtab(struct module *mod, struct load_info *info) { Elf_Shdr *symsect = info->sechdrs + info->index.sym; Elf_Shdr *strsect = info->sechdrs + info->index.str; const Elf_Sym *src; unsigned int i, nsrc, ndst, strtab_size = 0; struct module_memory *mod_mem_data = &mod->mem[MOD_DATA]; struct module_memory *mod_mem_init_data = &mod->mem[MOD_INIT_DATA]; /* Put symbol section at end of init part of module. */ symsect->sh_flags |= SHF_ALLOC; symsect->sh_entsize = module_get_offset_and_type(mod, MOD_INIT_DATA, symsect, info->index.sym); pr_debug("\t%s\n", info->secstrings + symsect->sh_name); src = (void *)info->hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); /* Compute total space required for the core symbols' strtab. */ for (ndst = i = 0; i < nsrc; i++) { if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { strtab_size += strlen(&info->strtab[src[i].st_name]) + 1; ndst++; } } /* Append room for core symbols at end of core part. */ info->symoffs = ALIGN(mod_mem_data->size, symsect->sh_addralign ?: 1); info->stroffs = mod_mem_data->size = info->symoffs + ndst * sizeof(Elf_Sym); mod_mem_data->size += strtab_size; /* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */ info->core_typeoffs = mod_mem_data->size; mod_mem_data->size += ndst * sizeof(char); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; strsect->sh_entsize = module_get_offset_and_type(mod, MOD_INIT_DATA, strsect, info->index.str); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); /* We'll tack temporary mod_kallsyms on the end. */ mod_mem_init_data->size = ALIGN(mod_mem_init_data->size, __alignof__(struct mod_kallsyms)); info->mod_kallsyms_init_off = mod_mem_init_data->size; mod_mem_init_data->size += sizeof(struct mod_kallsyms); info->init_typeoffs = mod_mem_init_data->size; mod_mem_init_data->size += nsrc * sizeof(char); } /* * We use the full symtab and strtab which layout_symtab arranged to * be appended to the init section. Later we switch to the cut-down * core-only ones. */ void add_kallsyms(struct module *mod, const struct load_info *info) { unsigned int i, ndst; const Elf_Sym *src; Elf_Sym *dst; char *s; Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; unsigned long strtab_size; void *data_base = mod->mem[MOD_DATA].base; void *init_data_base = mod->mem[MOD_INIT_DATA].base; /* Set up to point into init section. */ mod->kallsyms = (void __rcu *)init_data_base + info->mod_kallsyms_init_off; rcu_read_lock(); /* The following is safe since this pointer cannot change */ rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr; rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ rcu_dereference(mod->kallsyms)->strtab = (void *)info->sechdrs[info->index.str].sh_addr; rcu_dereference(mod->kallsyms)->typetab = init_data_base + info->init_typeoffs; /* * Now populate the cut down core kallsyms for after init * and set types up while we still have access to sections. */ mod->core_kallsyms.symtab = dst = data_base + info->symoffs; mod->core_kallsyms.strtab = s = data_base + info->stroffs; mod->core_kallsyms.typetab = data_base + info->core_typeoffs; strtab_size = info->core_typeoffs - info->stroffs; src = rcu_dereference(mod->kallsyms)->symtab; for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) { rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { ssize_t ret; mod->core_kallsyms.typetab[ndst] = rcu_dereference(mod->kallsyms)->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; ret = strscpy(s, &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name], strtab_size); if (ret < 0) break; s += ret + 1; strtab_size -= ret + 1; } } rcu_read_unlock(); mod->core_kallsyms.num_symtab = ndst; } #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) void init_build_id(struct module *mod, const struct load_info *info) { const Elf_Shdr *sechdr; unsigned int i; for (i = 0; i < info->hdr->e_shnum; i++) { sechdr = &info->sechdrs[i]; if (!sect_empty(sechdr) && sechdr->sh_type == SHT_NOTE && !build_id_parse_buf((void *)sechdr->sh_addr, mod->build_id, sechdr->sh_size)) break; } } #else void init_build_id(struct module *mod, const struct load_info *info) { } #endif static const char *kallsyms_symbol_name(struct mod_kallsyms *kallsyms, unsigned int symnum) { return kallsyms->strtab + kallsyms->symtab[symnum].st_name; } /* * Given a module and address, find the corresponding symbol and return its name * while providing its size and offset if needed. */ static const char *find_kallsyms_symbol(struct module *mod, unsigned long addr, unsigned long *size, unsigned long *offset) { unsigned int i, best = 0; unsigned long nextval, bestval; struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); struct module_memory *mod_mem; /* At worse, next value is at end of module */ if (within_module_init(addr, mod)) mod_mem = &mod->mem[MOD_INIT_TEXT]; else mod_mem = &mod->mem[MOD_TEXT]; nextval = (unsigned long)mod_mem->base + mod_mem->size; bestval = kallsyms_symbol_value(&kallsyms->symtab[best]); /* * Scan for closest preceding symbol, and next symbol. (ELF * starts real symbols at 1). */ for (i = 1; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; unsigned long thisval = kallsyms_symbol_value(sym); if (sym->st_shndx == SHN_UNDEF) continue; /* * We ignore unnamed symbols: they're uninformative * and inserted at a whim. */ if (*kallsyms_symbol_name(kallsyms, i) == '\0' || is_mapping_symbol(kallsyms_symbol_name(kallsyms, i))) continue; if (thisval <= addr && thisval > bestval) { best = i; bestval = thisval; } if (thisval > addr && thisval < nextval) nextval = thisval; } if (!best) return NULL; if (size) *size = nextval - bestval; if (offset) *offset = addr - bestval; return kallsyms_symbol_name(kallsyms, best); } void * __weak dereference_module_function_descriptor(struct module *mod, void *ptr) { return ptr; } /* * For kallsyms to ask for address resolution. NULL means not found. Careful * not to lock to avoid deadlock on oopses, simply disable preemption. */ const char *module_address_lookup(unsigned long addr, unsigned long *size, unsigned long *offset, char **modname, const unsigned char **modbuildid, char *namebuf) { const char *ret = NULL; struct module *mod; preempt_disable(); mod = __module_address(addr); if (mod) { if (modname) *modname = mod->name; if (modbuildid) { #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) *modbuildid = mod->build_id; #else *modbuildid = NULL; #endif } ret = find_kallsyms_symbol(mod, addr, size, offset); } /* Make a copy in here where it's safe */ if (ret) { strncpy(namebuf, ret, KSYM_NAME_LEN - 1); ret = namebuf; } preempt_enable(); return ret; } int lookup_module_symbol_name(unsigned long addr, char *symname) { struct module *mod; preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; if (within_module(addr, mod)) { const char *sym; sym = find_kallsyms_symbol(mod, addr, NULL, NULL); if (!sym) goto out; strscpy(symname, sym, KSYM_NAME_LEN); preempt_enable(); return 0; } } out: preempt_enable(); return -ERANGE; } int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported) { struct module *mod; preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { struct mod_kallsyms *kallsyms; if (mod->state == MODULE_STATE_UNFORMED) continue; kallsyms = rcu_dereference_sched(mod->kallsyms); if (symnum < kallsyms->num_symtab) { const Elf_Sym *sym = &kallsyms->symtab[symnum]; *value = kallsyms_symbol_value(sym); *type = kallsyms->typetab[symnum]; strscpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN); strscpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } symnum -= kallsyms->num_symtab; } preempt_enable(); return -ERANGE; } /* Given a module and name of symbol, find and return the symbol's value */ static unsigned long __find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned int i; struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; if (strcmp(name, kallsyms_symbol_name(kallsyms, i)) == 0 && sym->st_shndx != SHN_UNDEF) return kallsyms_symbol_value(sym); } return 0; } static unsigned long __module_kallsyms_lookup_name(const char *name) { struct module *mod; char *colon; colon = strnchr(name, MODULE_NAME_LEN, ':'); if (colon) { mod = find_module_all(name, colon - name, false); if (mod) return __find_kallsyms_symbol_value(mod, colon + 1); return 0; } list_for_each_entry_rcu(mod, &modules, list) { unsigned long ret; if (mod->state == MODULE_STATE_UNFORMED) continue; ret = __find_kallsyms_symbol_value(mod, name); if (ret) return ret; } return 0; } /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name) { unsigned long ret; /* Don't lock: we're in enough trouble already. */ preempt_disable(); ret = __module_kallsyms_lookup_name(name); preempt_enable(); return ret; } unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned long ret; preempt_disable(); ret = __find_kallsyms_symbol_value(mod, name); preempt_enable(); return ret; } int module_kallsyms_on_each_symbol(const char *modname, int (*fn)(void *, const char *, unsigned long), void *data) { struct module *mod; unsigned int i; int ret = 0; mutex_lock(&module_mutex); list_for_each_entry(mod, &modules, list) { struct mod_kallsyms *kallsyms; if (mod->state == MODULE_STATE_UNFORMED) continue; if (modname && strcmp(modname, mod->name)) continue; /* Use rcu_dereference_sched() to remain compliant with the sparse tool */ preempt_disable(); kallsyms = rcu_dereference_sched(mod->kallsyms); preempt_enable(); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; if (sym->st_shndx == SHN_UNDEF) continue; ret = fn(data, kallsyms_symbol_name(kallsyms, i), kallsyms_symbol_value(sym)); if (ret != 0) goto out; } /* * The given module is found, the subsequent modules do not * need to be compared. */ if (modname) break; } out: mutex_unlock(&module_mutex); return ret; }
24 25 3207 887 881 3207 156 12 10206 1803 10206 449 590 449 2545 11757 46 9943 2436 3195 2431 2436 9982 9063 9958 208 3 2 3 3 3 3 3 3 1 2 2 2 2 7 7 7 3 3 3 3 3 3 3 3 3 3 3 2 1 1 1 1 3 3 3 3153 3149 3146 3149 3144 46 46 2 46 46 6 4 1 1 1 8 8 8 8 9950 9948 9951 9948 346 9952 9955 9943 9945 9980 2546 2543 2548 2536 2143 1865 2538 2538 2539 951 951 953 952 103 108 109 863 951 953 39 39 39 39 39 397 398 399 398 397 377 195 195 398 398 399 398 3 2 3 1 7 6 6 6 5 7 7 24 24 24 7 24 7 19 19 3 19 3 16 16 8 16 8 10 10 24 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/file.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" #include <linux/slab.h> /* * Mapping table from "enum tomoyo_path_acl_index" to "enum tomoyo_mac_index". */ static const u8 tomoyo_p2mac[TOMOYO_MAX_PATH_OPERATION] = { [TOMOYO_TYPE_EXECUTE] = TOMOYO_MAC_FILE_EXECUTE, [TOMOYO_TYPE_READ] = TOMOYO_MAC_FILE_OPEN, [TOMOYO_TYPE_WRITE] = TOMOYO_MAC_FILE_OPEN, [TOMOYO_TYPE_APPEND] = TOMOYO_MAC_FILE_OPEN, [TOMOYO_TYPE_UNLINK] = TOMOYO_MAC_FILE_UNLINK, [TOMOYO_TYPE_GETATTR] = TOMOYO_MAC_FILE_GETATTR, [TOMOYO_TYPE_RMDIR] = TOMOYO_MAC_FILE_RMDIR, [TOMOYO_TYPE_TRUNCATE] = TOMOYO_MAC_FILE_TRUNCATE, [TOMOYO_TYPE_SYMLINK] = TOMOYO_MAC_FILE_SYMLINK, [TOMOYO_TYPE_CHROOT] = TOMOYO_MAC_FILE_CHROOT, [TOMOYO_TYPE_UMOUNT] = TOMOYO_MAC_FILE_UMOUNT, }; /* * Mapping table from "enum tomoyo_mkdev_acl_index" to "enum tomoyo_mac_index". */ const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = { [TOMOYO_TYPE_MKBLOCK] = TOMOYO_MAC_FILE_MKBLOCK, [TOMOYO_TYPE_MKCHAR] = TOMOYO_MAC_FILE_MKCHAR, }; /* * Mapping table from "enum tomoyo_path2_acl_index" to "enum tomoyo_mac_index". */ const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = { [TOMOYO_TYPE_LINK] = TOMOYO_MAC_FILE_LINK, [TOMOYO_TYPE_RENAME] = TOMOYO_MAC_FILE_RENAME, [TOMOYO_TYPE_PIVOT_ROOT] = TOMOYO_MAC_FILE_PIVOT_ROOT, }; /* * Mapping table from "enum tomoyo_path_number_acl_index" to * "enum tomoyo_mac_index". */ const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = { [TOMOYO_TYPE_CREATE] = TOMOYO_MAC_FILE_CREATE, [TOMOYO_TYPE_MKDIR] = TOMOYO_MAC_FILE_MKDIR, [TOMOYO_TYPE_MKFIFO] = TOMOYO_MAC_FILE_MKFIFO, [TOMOYO_TYPE_MKSOCK] = TOMOYO_MAC_FILE_MKSOCK, [TOMOYO_TYPE_IOCTL] = TOMOYO_MAC_FILE_IOCTL, [TOMOYO_TYPE_CHMOD] = TOMOYO_MAC_FILE_CHMOD, [TOMOYO_TYPE_CHOWN] = TOMOYO_MAC_FILE_CHOWN, [TOMOYO_TYPE_CHGRP] = TOMOYO_MAC_FILE_CHGRP, }; /** * tomoyo_put_name_union - Drop reference on "struct tomoyo_name_union". * * @ptr: Pointer to "struct tomoyo_name_union". * * Returns nothing. */ void tomoyo_put_name_union(struct tomoyo_name_union *ptr) { tomoyo_put_group(ptr->group); tomoyo_put_name(ptr->filename); } /** * tomoyo_compare_name_union - Check whether a name matches "struct tomoyo_name_union" or not. * * @name: Pointer to "struct tomoyo_path_info". * @ptr: Pointer to "struct tomoyo_name_union". * * Returns "struct tomoyo_path_info" if @name matches @ptr, NULL otherwise. */ const struct tomoyo_path_info * tomoyo_compare_name_union(const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr) { if (ptr->group) return tomoyo_path_matches_group(name, ptr->group); if (tomoyo_path_matches_pattern(name, ptr->filename)) return ptr->filename; return NULL; } /** * tomoyo_put_number_union - Drop reference on "struct tomoyo_number_union". * * @ptr: Pointer to "struct tomoyo_number_union". * * Returns nothing. */ void tomoyo_put_number_union(struct tomoyo_number_union *ptr) { tomoyo_put_group(ptr->group); } /** * tomoyo_compare_number_union - Check whether a value matches "struct tomoyo_number_union" or not. * * @value: Number to check. * @ptr: Pointer to "struct tomoyo_number_union". * * Returns true if @value matches @ptr, false otherwise. */ bool tomoyo_compare_number_union(const unsigned long value, const struct tomoyo_number_union *ptr) { if (ptr->group) return tomoyo_number_matches_group(value, value, ptr->group); return value >= ptr->values[0] && value <= ptr->values[1]; } /** * tomoyo_add_slash - Add trailing '/' if needed. * * @buf: Pointer to "struct tomoyo_path_info". * * Returns nothing. * * @buf must be generated by tomoyo_encode() because this function does not * allocate memory for adding '/'. */ static void tomoyo_add_slash(struct tomoyo_path_info *buf) { if (buf->is_dir) return; /* * This is OK because tomoyo_encode() reserves space for appending "/". */ strcat((char *) buf->name, "/"); tomoyo_fill_path_info(buf); } /** * tomoyo_get_realpath - Get realpath. * * @buf: Pointer to "struct tomoyo_path_info". * @path: Pointer to "struct path". * * Returns true on success, false otherwise. */ static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, const struct path *path) { buf->name = tomoyo_realpath_from_path(path); if (buf->name) { tomoyo_fill_path_info(buf); return true; } return false; } /** * tomoyo_audit_path_log - Audit path request log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_path_log(struct tomoyo_request_info *r) { return tomoyo_supervisor(r, "file %s %s\n", tomoyo_path_keyword [r->param.path.operation], r->param.path.filename->name); } /** * tomoyo_audit_path2_log - Audit path/path request log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_path2_log(struct tomoyo_request_info *r) { return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords [tomoyo_pp2mac[r->param.path2.operation]], r->param.path2.filename1->name, r->param.path2.filename2->name); } /** * tomoyo_audit_mkdev_log - Audit path/number/number/number request log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_mkdev_log(struct tomoyo_request_info *r) { return tomoyo_supervisor(r, "file %s %s 0%o %u %u\n", tomoyo_mac_keywords [tomoyo_pnnn2mac[r->param.mkdev.operation]], r->param.mkdev.filename->name, r->param.mkdev.mode, r->param.mkdev.major, r->param.mkdev.minor); } /** * tomoyo_audit_path_number_log - Audit path/number request log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_path_number_log(struct tomoyo_request_info *r) { const u8 type = r->param.path_number.operation; u8 radix; char buffer[64]; switch (type) { case TOMOYO_TYPE_CREATE: case TOMOYO_TYPE_MKDIR: case TOMOYO_TYPE_MKFIFO: case TOMOYO_TYPE_MKSOCK: case TOMOYO_TYPE_CHMOD: radix = TOMOYO_VALUE_TYPE_OCTAL; break; case TOMOYO_TYPE_IOCTL: radix = TOMOYO_VALUE_TYPE_HEXADECIMAL; break; default: radix = TOMOYO_VALUE_TYPE_DECIMAL; break; } tomoyo_print_ulong(buffer, sizeof(buffer), r->param.path_number.number, radix); return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords [tomoyo_pn2mac[type]], r->param.path_number.filename->name, buffer); } /** * tomoyo_check_path_acl - Check permission for path operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. * * To be able to use wildcard for domain transition, this function sets * matching entry on success. Since the caller holds tomoyo_read_lock(), * it is safe to set matching entry. */ static bool tomoyo_check_path_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_path_acl *acl = container_of(ptr, typeof(*acl), head); if (acl->perm & (1 << r->param.path.operation)) { r->param.path.matched_path = tomoyo_compare_name_union(r->param.path.filename, &acl->name); return r->param.path.matched_path != NULL; } return false; } /** * tomoyo_check_path_number_acl - Check permission for path number operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_path_number_acl *acl = container_of(ptr, typeof(*acl), head); return (acl->perm & (1 << r->param.path_number.operation)) && tomoyo_compare_number_union(r->param.path_number.number, &acl->number) && tomoyo_compare_name_union(r->param.path_number.filename, &acl->name); } /** * tomoyo_check_path2_acl - Check permission for path path operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_path2_acl *acl = container_of(ptr, typeof(*acl), head); return (acl->perm & (1 << r->param.path2.operation)) && tomoyo_compare_name_union(r->param.path2.filename1, &acl->name1) && tomoyo_compare_name_union(r->param.path2.filename2, &acl->name2); } /** * tomoyo_check_mkdev_acl - Check permission for path number number number operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_mkdev_acl *acl = container_of(ptr, typeof(*acl), head); return (acl->perm & (1 << r->param.mkdev.operation)) && tomoyo_compare_number_union(r->param.mkdev.mode, &acl->mode) && tomoyo_compare_number_union(r->param.mkdev.major, &acl->major) && tomoyo_compare_number_union(r->param.mkdev.minor, &acl->minor) && tomoyo_compare_name_union(r->param.mkdev.filename, &acl->name); } /** * tomoyo_same_path_acl - Check for duplicated "struct tomoyo_path_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_path_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_path_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_path_acl *p2 = container_of(b, typeof(*p2), head); return tomoyo_same_name_union(&p1->name, &p2->name); } /** * tomoyo_merge_path_acl - Merge duplicated "struct tomoyo_path_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u16 * const a_perm = &container_of(a, struct tomoyo_path_acl, head) ->perm; u16 perm = READ_ONCE(*a_perm); const u16 b_perm = container_of(b, struct tomoyo_path_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_update_path_acl - Update "struct tomoyo_path_acl" list. * * @perm: Permission. * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ static int tomoyo_update_path_acl(const u16 perm, struct tomoyo_acl_param *param) { struct tomoyo_path_acl e = { .head.type = TOMOYO_TYPE_PATH_ACL, .perm = perm }; int error; if (!tomoyo_parse_name_union(param, &e.name)) error = -EINVAL; else error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_path_acl, tomoyo_merge_path_acl); tomoyo_put_name_union(&e.name); return error; } /** * tomoyo_same_mkdev_acl - Check for duplicated "struct tomoyo_mkdev_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_mkdev_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2), head); return tomoyo_same_name_union(&p1->name, &p2->name) && tomoyo_same_number_union(&p1->mode, &p2->mode) && tomoyo_same_number_union(&p1->major, &p2->major) && tomoyo_same_number_union(&p1->minor, &p2->minor); } /** * tomoyo_merge_mkdev_acl - Merge duplicated "struct tomoyo_mkdev_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u8 *const a_perm = &container_of(a, struct tomoyo_mkdev_acl, head)->perm; u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_mkdev_acl, head) ->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_update_mkdev_acl - Update "struct tomoyo_mkdev_acl" list. * * @perm: Permission. * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ static int tomoyo_update_mkdev_acl(const u8 perm, struct tomoyo_acl_param *param) { struct tomoyo_mkdev_acl e = { .head.type = TOMOYO_TYPE_MKDEV_ACL, .perm = perm }; int error; if (!tomoyo_parse_name_union(param, &e.name) || !tomoyo_parse_number_union(param, &e.mode) || !tomoyo_parse_number_union(param, &e.major) || !tomoyo_parse_number_union(param, &e.minor)) error = -EINVAL; else error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_mkdev_acl, tomoyo_merge_mkdev_acl); tomoyo_put_name_union(&e.name); tomoyo_put_number_union(&e.mode); tomoyo_put_number_union(&e.major); tomoyo_put_number_union(&e.minor); return error; } /** * tomoyo_same_path2_acl - Check for duplicated "struct tomoyo_path2_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_path2_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_path2_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_path2_acl *p2 = container_of(b, typeof(*p2), head); return tomoyo_same_name_union(&p1->name1, &p2->name1) && tomoyo_same_name_union(&p1->name2, &p2->name2); } /** * tomoyo_merge_path2_acl - Merge duplicated "struct tomoyo_path2_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u8 * const a_perm = &container_of(a, struct tomoyo_path2_acl, head) ->perm; u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_path2_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_update_path2_acl - Update "struct tomoyo_path2_acl" list. * * @perm: Permission. * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ static int tomoyo_update_path2_acl(const u8 perm, struct tomoyo_acl_param *param) { struct tomoyo_path2_acl e = { .head.type = TOMOYO_TYPE_PATH2_ACL, .perm = perm }; int error; if (!tomoyo_parse_name_union(param, &e.name1) || !tomoyo_parse_name_union(param, &e.name2)) error = -EINVAL; else error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_path2_acl, tomoyo_merge_path2_acl); tomoyo_put_name_union(&e.name1); tomoyo_put_name_union(&e.name2); return error; } /** * tomoyo_path_permission - Check permission for single path operation. * * @r: Pointer to "struct tomoyo_request_info". * @operation: Type of operation. * @filename: Filename to check. * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ static int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation, const struct tomoyo_path_info *filename) { int error; r->type = tomoyo_p2mac[operation]; r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type); if (r->mode == TOMOYO_CONFIG_DISABLED) return 0; r->param_type = TOMOYO_TYPE_PATH_ACL; r->param.path.filename = filename; r->param.path.operation = operation; do { tomoyo_check_acl(r, tomoyo_check_path_acl); error = tomoyo_audit_path_log(r); } while (error == TOMOYO_RETRY_REQUEST); return error; } /** * tomoyo_execute_permission - Check permission for execute operation. * * @r: Pointer to "struct tomoyo_request_info". * @filename: Filename to check. * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_execute_permission(struct tomoyo_request_info *r, const struct tomoyo_path_info *filename) { /* * Unlike other permission checks, this check is done regardless of * profile mode settings in order to check for domain transition * preference. */ r->type = TOMOYO_MAC_FILE_EXECUTE; r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type); r->param_type = TOMOYO_TYPE_PATH_ACL; r->param.path.filename = filename; r->param.path.operation = TOMOYO_TYPE_EXECUTE; tomoyo_check_acl(r, tomoyo_check_path_acl); r->ee->transition = r->matched_acl && r->matched_acl->cond ? r->matched_acl->cond->transit : NULL; if (r->mode != TOMOYO_CONFIG_DISABLED) return tomoyo_audit_path_log(r); return 0; } /** * tomoyo_same_path_number_acl - Check for duplicated "struct tomoyo_path_number_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_path_number_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_path_number_acl *p2 = container_of(b, typeof(*p2), head); return tomoyo_same_name_union(&p1->name, &p2->name) && tomoyo_same_number_union(&p1->number, &p2->number); } /** * tomoyo_merge_path_number_acl - Merge duplicated "struct tomoyo_path_number_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u8 * const a_perm = &container_of(a, struct tomoyo_path_number_acl, head)->perm; u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_path_number_acl, head) ->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_update_path_number_acl - Update ioctl/chmod/chown/chgrp ACL. * * @perm: Permission. * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_update_path_number_acl(const u8 perm, struct tomoyo_acl_param *param) { struct tomoyo_path_number_acl e = { .head.type = TOMOYO_TYPE_PATH_NUMBER_ACL, .perm = perm }; int error; if (!tomoyo_parse_name_union(param, &e.name) || !tomoyo_parse_number_union(param, &e.number)) error = -EINVAL; else error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_path_number_acl, tomoyo_merge_path_number_acl); tomoyo_put_name_union(&e.name); tomoyo_put_number_union(&e.number); return error; } /** * tomoyo_path_number_perm - Check permission for "create", "mkdir", "mkfifo", "mksock", "ioctl", "chmod", "chown", "chgrp". * * @type: Type of operation. * @path: Pointer to "struct path". * @number: Number. * * Returns 0 on success, negative value otherwise. */ int tomoyo_path_number_perm(const u8 type, const struct path *path, unsigned long number) { struct tomoyo_request_info r; struct tomoyo_obj_info obj = { .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int error = -ENOMEM; struct tomoyo_path_info buf; int idx; if (tomoyo_init_request_info(&r, NULL, tomoyo_pn2mac[type]) == TOMOYO_CONFIG_DISABLED) return 0; idx = tomoyo_read_lock(); if (!tomoyo_get_realpath(&buf, path)) goto out; r.obj = &obj; if (type == TOMOYO_TYPE_MKDIR) tomoyo_add_slash(&buf); r.param_type = TOMOYO_TYPE_PATH_NUMBER_ACL; r.param.path_number.operation = type; r.param.path_number.filename = &buf; r.param.path_number.number = number; do { tomoyo_check_acl(&r, tomoyo_check_path_number_acl); error = tomoyo_audit_path_number_log(&r); } while (error == TOMOYO_RETRY_REQUEST); kfree(buf.name); out: tomoyo_read_unlock(idx); if (r.mode != TOMOYO_CONFIG_ENFORCING) error = 0; return error; } /** * tomoyo_check_open_permission - Check permission for "read" and "write". * * @domain: Pointer to "struct tomoyo_domain_info". * @path: Pointer to "struct path". * @flag: Flags for open(). * * Returns 0 on success, negative value otherwise. */ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, const struct path *path, const int flag) { const u8 acc_mode = ACC_MODE(flag); int error = 0; struct tomoyo_path_info buf; struct tomoyo_request_info r; struct tomoyo_obj_info obj = { .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int idx; buf.name = NULL; r.mode = TOMOYO_CONFIG_DISABLED; idx = tomoyo_read_lock(); if (acc_mode && tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN) != TOMOYO_CONFIG_DISABLED) { if (!tomoyo_get_realpath(&buf, path)) { error = -ENOMEM; goto out; } r.obj = &obj; if (acc_mode & MAY_READ) error = tomoyo_path_permission(&r, TOMOYO_TYPE_READ, &buf); if (!error && (acc_mode & MAY_WRITE)) error = tomoyo_path_permission(&r, (flag & O_APPEND) ? TOMOYO_TYPE_APPEND : TOMOYO_TYPE_WRITE, &buf); } out: kfree(buf.name); tomoyo_read_unlock(idx); if (r.mode != TOMOYO_CONFIG_ENFORCING) error = 0; return error; } /** * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "append", "chroot" and "unmount". * * @operation: Type of operation. * @path: Pointer to "struct path". * @target: Symlink's target if @operation is TOMOYO_TYPE_SYMLINK, * NULL otherwise. * * Returns 0 on success, negative value otherwise. */ int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target) { struct tomoyo_request_info r; struct tomoyo_obj_info obj = { .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int error; struct tomoyo_path_info buf; bool is_enforce; struct tomoyo_path_info symlink_target; int idx; if (tomoyo_init_request_info(&r, NULL, tomoyo_p2mac[operation]) == TOMOYO_CONFIG_DISABLED) return 0; is_enforce = (r.mode == TOMOYO_CONFIG_ENFORCING); error = -ENOMEM; buf.name = NULL; idx = tomoyo_read_lock(); if (!tomoyo_get_realpath(&buf, path)) goto out; r.obj = &obj; switch (operation) { case TOMOYO_TYPE_RMDIR: case TOMOYO_TYPE_CHROOT: tomoyo_add_slash(&buf); break; case TOMOYO_TYPE_SYMLINK: symlink_target.name = tomoyo_encode(target); if (!symlink_target.name) goto out; tomoyo_fill_path_info(&symlink_target); obj.symlink_target = &symlink_target; break; } error = tomoyo_path_permission(&r, operation, &buf); if (operation == TOMOYO_TYPE_SYMLINK) kfree(symlink_target.name); out: kfree(buf.name); tomoyo_read_unlock(idx); if (!is_enforce) error = 0; return error; } /** * tomoyo_mkdev_perm - Check permission for "mkblock" and "mkchar". * * @operation: Type of operation. (TOMOYO_TYPE_MKCHAR or TOMOYO_TYPE_MKBLOCK) * @path: Pointer to "struct path". * @mode: Create mode. * @dev: Device number. * * Returns 0 on success, negative value otherwise. */ int tomoyo_mkdev_perm(const u8 operation, const struct path *path, const unsigned int mode, unsigned int dev) { struct tomoyo_request_info r; struct tomoyo_obj_info obj = { .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int error = -ENOMEM; struct tomoyo_path_info buf; int idx; if (tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation]) == TOMOYO_CONFIG_DISABLED) return 0; idx = tomoyo_read_lock(); error = -ENOMEM; if (tomoyo_get_realpath(&buf, path)) { r.obj = &obj; dev = new_decode_dev(dev); r.param_type = TOMOYO_TYPE_MKDEV_ACL; r.param.mkdev.filename = &buf; r.param.mkdev.operation = operation; r.param.mkdev.mode = mode; r.param.mkdev.major = MAJOR(dev); r.param.mkdev.minor = MINOR(dev); tomoyo_check_acl(&r, tomoyo_check_mkdev_acl); error = tomoyo_audit_mkdev_log(&r); kfree(buf.name); } tomoyo_read_unlock(idx); if (r.mode != TOMOYO_CONFIG_ENFORCING) error = 0; return error; } /** * tomoyo_path2_perm - Check permission for "rename", "link" and "pivot_root". * * @operation: Type of operation. * @path1: Pointer to "struct path". * @path2: Pointer to "struct path". * * Returns 0 on success, negative value otherwise. */ int tomoyo_path2_perm(const u8 operation, const struct path *path1, const struct path *path2) { int error = -ENOMEM; struct tomoyo_path_info buf1; struct tomoyo_path_info buf2; struct tomoyo_request_info r; struct tomoyo_obj_info obj = { .path1 = { .mnt = path1->mnt, .dentry = path1->dentry }, .path2 = { .mnt = path2->mnt, .dentry = path2->dentry } }; int idx; if (tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation]) == TOMOYO_CONFIG_DISABLED) return 0; buf1.name = NULL; buf2.name = NULL; idx = tomoyo_read_lock(); if (!tomoyo_get_realpath(&buf1, path1) || !tomoyo_get_realpath(&buf2, path2)) goto out; switch (operation) { case TOMOYO_TYPE_RENAME: case TOMOYO_TYPE_LINK: if (!d_is_dir(path1->dentry)) break; fallthrough; case TOMOYO_TYPE_PIVOT_ROOT: tomoyo_add_slash(&buf1); tomoyo_add_slash(&buf2); break; } r.obj = &obj; r.param_type = TOMOYO_TYPE_PATH2_ACL; r.param.path2.operation = operation; r.param.path2.filename1 = &buf1; r.param.path2.filename2 = &buf2; do { tomoyo_check_acl(&r, tomoyo_check_path2_acl); error = tomoyo_audit_path2_log(&r); } while (error == TOMOYO_RETRY_REQUEST); out: kfree(buf1.name); kfree(buf2.name); tomoyo_read_unlock(idx); if (r.mode != TOMOYO_CONFIG_ENFORCING) error = 0; return error; } /** * tomoyo_same_mount_acl - Check for duplicated "struct tomoyo_mount_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b, false otherwise. */ static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head); return tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) && tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) && tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) && tomoyo_same_number_union(&p1->flags, &p2->flags); } /** * tomoyo_update_mount_acl - Write "struct tomoyo_mount_acl" list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ static int tomoyo_update_mount_acl(struct tomoyo_acl_param *param) { struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL }; int error; if (!tomoyo_parse_name_union(param, &e.dev_name) || !tomoyo_parse_name_union(param, &e.dir_name) || !tomoyo_parse_name_union(param, &e.fs_type) || !tomoyo_parse_number_union(param, &e.flags)) error = -EINVAL; else error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_mount_acl, NULL); tomoyo_put_name_union(&e.dev_name); tomoyo_put_name_union(&e.dir_name); tomoyo_put_name_union(&e.fs_type); tomoyo_put_number_union(&e.flags); return error; } /** * tomoyo_write_file - Update file related list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_write_file(struct tomoyo_acl_param *param) { u16 perm = 0; u8 type; const char *operation = tomoyo_read_token(param); for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_path_keyword[type])) perm |= 1 << type; if (perm) return tomoyo_update_path_acl(perm, param); for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_mac_keywords[tomoyo_pp2mac[type]])) perm |= 1 << type; if (perm) return tomoyo_update_path2_acl(perm, param); for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_mac_keywords[tomoyo_pn2mac[type]])) perm |= 1 << type; if (perm) return tomoyo_update_path_number_acl(perm, param); for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_mac_keywords[tomoyo_pnnn2mac[type]])) perm |= 1 << type; if (perm) return tomoyo_update_mkdev_acl(perm, param); if (tomoyo_permstr(operation, tomoyo_mac_keywords[TOMOYO_MAC_FILE_MOUNT])) return tomoyo_update_mount_acl(param); return -EINVAL; }
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 // SPDX-License-Identifier: GPL-2.0 #ifndef IORING_CANCEL_H #define IORING_CANCEL_H #include <linux/io_uring_types.h> struct io_cancel_data { struct io_ring_ctx *ctx; union { u64 data; struct file *file; }; u8 opcode; u32 flags; int seq; }; int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, unsigned int issue_flags); void init_hash_table(struct io_hash_table *table, unsigned size); int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg); bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd); static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence) { if ((req->flags & REQ_F_CANCEL_SEQ) && sequence == req->work.cancel_seq) return true; req->flags |= REQ_F_CANCEL_SEQ; req->work.cancel_seq = sequence; return false; } #endif
21 19 19 19 18 19 19 22 19 22 22 22 22 21 1 1 22 1 62 7 7 7 19 24 23 2 21 21 1 2 1 1 1 19 19 19 20 19 19 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 // SPDX-License-Identifier: GPL-2.0 #include <linux/fs.h> #include <linux/random.h> #include <linux/buffer_head.h> #include <linux/utsname.h> #include <linux/kthread.h> #include "ext4.h" /* Checksumming functions */ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) { struct ext4_sb_info *sbi = EXT4_SB(sb); int offset = offsetof(struct mmp_struct, mmp_checksum); __u32 csum; csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); return cpu_to_le32(csum); } static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) { if (!ext4_has_metadata_csum(sb)) return 1; return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); } static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) { if (!ext4_has_metadata_csum(sb)) return; mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); } /* * Write the MMP block using REQ_SYNC to try to get the block on-disk * faster. */ static int write_mmp_block_thawed(struct super_block *sb, struct buffer_head *bh) { struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); ext4_mmp_csum_set(sb, mmp); lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (unlikely(!buffer_uptodate(bh))) return -EIO; return 0; } static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) { int err; /* * We protect against freezing so that we don't create dirty buffers * on frozen filesystem. */ sb_start_write(sb); err = write_mmp_block_thawed(sb, bh); sb_end_write(sb); return err; } /* * Read the MMP block. It _must_ be read from disk and hence we clear the * uptodate flag on the buffer. */ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ext4_fsblk_t mmp_block) { struct mmp_struct *mmp; int ret; if (*bh) clear_buffer_uptodate(*bh); /* This would be sb_bread(sb, mmp_block), except we need to be sure * that the MD RAID device cache has been bypassed, and that the read * is not blocked in the elevator. */ if (!*bh) { *bh = sb_getblk(sb, mmp_block); if (!*bh) { ret = -ENOMEM; goto warn_exit; } } lock_buffer(*bh); ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL); if (ret) goto warn_exit; mmp = (struct mmp_struct *)((*bh)->b_data); if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) { ret = -EFSCORRUPTED; goto warn_exit; } if (!ext4_mmp_csum_verify(sb, mmp)) { ret = -EFSBADCRC; goto warn_exit; } return 0; warn_exit: brelse(*bh); *bh = NULL; ext4_warning(sb, "Error %d while reading MMP block %llu", ret, mmp_block); return ret; } /* * Dump as much information as possible to help the admin. */ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, const char *function, unsigned int line, const char *msg) { __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", (unsigned long long)le64_to_cpu(mmp->mmp_time), (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); } /* * kmmpd will update the MMP sequence every s_mmp_update_interval seconds */ static int kmmpd(void *data) { struct super_block *sb = data; struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh; struct mmp_struct *mmp; ext4_fsblk_t mmp_block; u32 seq = 0; unsigned long failed_writes = 0; int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); unsigned mmp_check_interval; unsigned long last_update_time; unsigned long diff; int retval = 0; mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data); mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); /* * Start with the higher mmp_check_interval and reduce it if * the MMP block is being updated on time. */ mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); memcpy(mmp->mmp_nodename, init_utsname()->nodename, sizeof(mmp->mmp_nodename)); while (!kthread_should_stop() && !ext4_forced_shutdown(sb)) { if (!ext4_has_feature_mmp(sb)) { ext4_warning(sb, "kmmpd being stopped since MMP feature" " has been disabled."); goto wait_to_exit; } if (++seq > EXT4_MMP_SEQ_MAX) seq = 1; mmp->mmp_seq = cpu_to_le32(seq); mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); last_update_time = jiffies; retval = write_mmp_block(sb, bh); /* * Don't spew too many error messages. Print one every * (s_mmp_update_interval * 60) seconds. */ if (retval) { if ((failed_writes % 60) == 0) { ext4_error_err(sb, -retval, "Error writing to MMP block"); } failed_writes++; } diff = jiffies - last_update_time; if (diff < mmp_update_interval * HZ) schedule_timeout_interruptible(mmp_update_interval * HZ - diff); /* * We need to make sure that more than mmp_check_interval * seconds have not passed since writing. If that has happened * we need to check if the MMP block is as we left it. */ diff = jiffies - last_update_time; if (diff > mmp_check_interval * HZ) { struct buffer_head *bh_check = NULL; struct mmp_struct *mmp_check; retval = read_mmp_block(sb, &bh_check, mmp_block); if (retval) { ext4_error_err(sb, -retval, "error reading MMP data: %d", retval); goto wait_to_exit; } mmp_check = (struct mmp_struct *)(bh_check->b_data); if (mmp->mmp_seq != mmp_check->mmp_seq || memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, sizeof(mmp->mmp_nodename))) { dump_mmp_msg(sb, mmp_check, "Error while updating MMP info. " "The filesystem seems to have been" " multiply mounted."); ext4_error_err(sb, EBUSY, "abort"); put_bh(bh_check); retval = -EBUSY; goto wait_to_exit; } put_bh(bh_check); } /* * Adjust the mmp_check_interval depending on how much time * it took for the MMP block to be written. */ mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, EXT4_MMP_MAX_CHECK_INTERVAL), EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); } /* * Unmount seems to be clean. */ mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); retval = write_mmp_block(sb, bh); wait_to_exit: while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop()) schedule(); } set_current_state(TASK_RUNNING); return retval; } void ext4_stop_mmpd(struct ext4_sb_info *sbi) { if (sbi->s_mmp_tsk) { kthread_stop(sbi->s_mmp_tsk); brelse(sbi->s_mmp_bh); sbi->s_mmp_tsk = NULL; } } /* * Get a random new sequence number but make sure it is not greater than * EXT4_MMP_SEQ_MAX. */ static unsigned int mmp_new_seq(void) { return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1); } /* * Protect the filesystem from being mounted more than once. */ int ext4_multi_mount_protect(struct super_block *sb, ext4_fsblk_t mmp_block) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *bh = NULL; struct mmp_struct *mmp = NULL; u32 seq; unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); unsigned int wait_time = 0; int retval; if (mmp_block < le32_to_cpu(es->s_first_data_block) || mmp_block >= ext4_blocks_count(es)) { ext4_warning(sb, "Invalid MMP block in superblock"); retval = -EINVAL; goto failed; } retval = read_mmp_block(sb, &bh, mmp_block); if (retval) goto failed; mmp = (struct mmp_struct *)(bh->b_data); if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; /* * If check_interval in MMP block is larger, use that instead of * update_interval from the superblock. */ if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); seq = le32_to_cpu(mmp->mmp_seq); if (seq == EXT4_MMP_SEQ_CLEAN) goto skip; if (seq == EXT4_MMP_SEQ_FSCK) { dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); retval = -EBUSY; goto failed; } wait_time = min(mmp_check_interval * 2 + 1, mmp_check_interval + 60); /* Print MMP interval if more than 20 secs. */ if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) ext4_warning(sb, "MMP interval %u higher than expected, please" " wait.\n", wait_time * 2); if (schedule_timeout_interruptible(HZ * wait_time) != 0) { ext4_warning(sb, "MMP startup interrupted, failing mount\n"); retval = -ETIMEDOUT; goto failed; } retval = read_mmp_block(sb, &bh, mmp_block); if (retval) goto failed; mmp = (struct mmp_struct *)(bh->b_data); if (seq != le32_to_cpu(mmp->mmp_seq)) { dump_mmp_msg(sb, mmp, "Device is already active on another node."); retval = -EBUSY; goto failed; } skip: /* * write a new random sequence number. */ seq = mmp_new_seq(); mmp->mmp_seq = cpu_to_le32(seq); /* * On mount / remount we are protected against fs freezing (by s_umount * semaphore) and grabbing freeze protection upsets lockdep */ retval = write_mmp_block_thawed(sb, bh); if (retval) goto failed; /* * wait for MMP interval and check mmp_seq. */ if (schedule_timeout_interruptible(HZ * wait_time) != 0) { ext4_warning(sb, "MMP startup interrupted, failing mount"); retval = -ETIMEDOUT; goto failed; } retval = read_mmp_block(sb, &bh, mmp_block); if (retval) goto failed; mmp = (struct mmp_struct *)(bh->b_data); if (seq != le32_to_cpu(mmp->mmp_seq)) { dump_mmp_msg(sb, mmp, "Device is already active on another node."); retval = -EBUSY; goto failed; } EXT4_SB(sb)->s_mmp_bh = bh; BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname), "%pg", bh->b_bdev); /* * Start a kernel thread to update the MMP block periodically. */ EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s", (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { EXT4_SB(sb)->s_mmp_tsk = NULL; ext4_warning(sb, "Unable to create kmmpd thread for %s.", sb->s_id); retval = -ENOMEM; goto failed; } return 0; failed: brelse(bh); return retval; }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match AH parameters. */ /* (C) 1999-2000 Yon Uriarte <yon@astaro.de> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ipt_ah.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { bool r; pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; const struct ipt_ah *ahinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); if (ah == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ pr_debug("Dropping evil AH tinygram.\n"); par->hotdrop = true; return false; } return spi_match(ahinfo->spis[0], ahinfo->spis[1], ntohl(ah->spi), !!(ahinfo->invflags & IPT_AH_INV_SPI)); } static int ah_mt_check(const struct xt_mtchk_param *par) { const struct ipt_ah *ahinfo = par->matchinfo; /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { pr_debug("unknown flags %X\n", ahinfo->invflags); return -EINVAL; } return 0; } static struct xt_match ah_mt_reg __read_mostly = { .name = "ah", .family = NFPROTO_IPV4, .match = ah_mt, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, .checkentry = ah_mt_check, .me = THIS_MODULE, }; static int __init ah_mt_init(void) { return xt_register_match(&ah_mt_reg); } static void __exit ah_mt_exit(void) { xt_unregister_match(&ah_mt_reg); } module_init(ah_mt_init); module_exit(ah_mt_exit);
1163 1163 1163 1163 1163 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * netprio_cgroup.h Control Group Priority set * * Authors: Neil Horman <nhorman@tuxdriver.com> */ #ifndef _NETPRIO_CGROUP_H #define _NETPRIO_CGROUP_H #include <linux/cgroup.h> #include <linux/hardirq.h> #include <linux/rcupdate.h> #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map { struct rcu_head rcu; u32 priomap_len; u32 priomap[]; }; static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; u32 idx; rcu_read_lock(); css = task_css(p, net_prio_cgrp_id); idx = css->id; rcu_read_unlock(); return idx; } static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) { if (in_interrupt()) return; sock_cgroup_set_prioidx(skcd, task_netprioidx(current)); } #else /* !CONFIG_CGROUP_NET_PRIO */ static inline u32 task_netprioidx(struct task_struct *p) { return 0; } static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) { } #endif /* CONFIG_CGROUP_NET_PRIO */ #endif /* _NET_CLS_CGROUP_H */
11 11 11 10 11 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 /* * linux/fs/nls/mac-cyrillic.c * * Charset maccyrillic translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ /* * COPYRIGHT AND PERMISSION NOTICE * * Copyright 1991-2012 Unicode, Inc. All rights reserved. Distributed under * the Terms of Use in http://www.unicode.org/copyright.html. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of the Unicode data files and any associated documentation (the "Data * Files") or Unicode software and any associated documentation (the * "Software") to deal in the Data Files or Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, and/or sell copies of the Data Files or Software, and * to permit persons to whom the Data Files or Software are furnished to do * so, provided that (a) the above copyright notice(s) and this permission * notice appear with all copies of the Data Files or Software, (b) both the * above copyright notice(s) and this permission notice appear in associated * documentation, and (c) there is clear notice in each modified Data File or * in the Software as well as in the documentation associated with the Data * File(s) or Software that the data or software has been modified. * * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THE DATA FILES OR SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or other * dealings in these Data Files or Software without prior written * authorization of the copyright holder. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80 */ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, /* 0x90 */ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, /* 0xa0 */ 0x2020, 0x00b0, 0x0490, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x0406, 0x00ae, 0x00a9, 0x2122, 0x0402, 0x0452, 0x2260, 0x0403, 0x0453, /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x0456, 0x00b5, 0x0491, 0x0408, 0x0404, 0x0454, 0x0407, 0x0457, 0x0409, 0x0459, 0x040a, 0x045a, /* 0xc0 */ 0x0458, 0x0405, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 0x00bb, 0x2026, 0x00a0, 0x040b, 0x045b, 0x040c, 0x045c, 0x0455, /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x201e, 0x040e, 0x045e, 0x040f, 0x045f, 0x2116, 0x0401, 0x0451, 0x044f, /* 0xe0 */ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, /* 0xf0 */ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x20ac, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xca, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x00, 0xa4, /* 0xa0-0xa7 */ 0x00, 0xa9, 0x00, 0xc7, 0xc2, 0x00, 0xa8, 0x00, /* 0xa8-0xaf */ 0xa1, 0xb1, 0x00, 0x00, 0x00, 0xb5, 0xa6, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd6, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page04[256] = { 0x00, 0xdd, 0xab, 0xae, 0xb8, 0xc1, 0xa7, 0xba, /* 0x00-0x07 */ 0xb7, 0xbc, 0xbe, 0xcb, 0xcd, 0x00, 0xd8, 0xda, /* 0x08-0x0f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x10-0x17 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x18-0x1f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x20-0x27 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x28-0x2f */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0x30-0x37 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0x38-0x3f */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0x40-0x47 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* 0x48-0x4f */ 0x00, 0xde, 0xac, 0xaf, 0xb9, 0xcf, 0xb4, 0xbb, /* 0x50-0x57 */ 0xc0, 0xbd, 0xbf, 0xcc, 0xce, 0x00, 0xd9, 0xdb, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xa2, 0xb6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0xd0, 0xd1, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd4, 0xd5, 0x00, 0x00, 0xd2, 0xd3, 0xd7, 0x00, /* 0x18-0x1f */ 0xa0, 0x00, 0xa5, 0x00, 0x00, 0x00, 0xc9, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page21[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, 0xb0, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xc5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xad, 0x00, 0x00, 0x00, 0xb2, 0xb3, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, NULL, NULL, page04, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, page21, page22, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "maccyrillic", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_maccyrillic(void) { return register_nls(&table); } static void __exit exit_nls_maccyrillic(void) { unregister_nls(&table); } module_init(init_nls_maccyrillic) module_exit(exit_nls_maccyrillic) MODULE_LICENSE("Dual BSD/GPL");
7 3 7 7 7 1 3 3 3 3 6 6 6 6 6 6 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2007 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/gen_stats.h> #include <linux/jhash.h> #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/slab.h> #include <net/gen_stats.h> #include <net/netlink.h> #include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_RATEEST.h> #include <net/netfilter/xt_rateest.h> #define RATEEST_HSIZE 16 struct xt_rateest_net { struct mutex hash_lock; struct hlist_head hash[RATEEST_HSIZE]; }; static unsigned int xt_rateest_id; static unsigned int jhash_rnd __read_mostly; static unsigned int xt_rateest_hash(const char *name) { return jhash(name, sizeof_field(struct xt_rateest, name), jhash_rnd) & (RATEEST_HSIZE - 1); } static void xt_rateest_hash_insert(struct xt_rateest_net *xn, struct xt_rateest *est) { unsigned int h; h = xt_rateest_hash(est->name); hlist_add_head(&est->list, &xn->hash[h]); } static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn, const char *name) { struct xt_rateest *est; unsigned int h; h = xt_rateest_hash(name); hlist_for_each_entry(est, &xn->hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; return est; } } return NULL; } struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); struct xt_rateest *est; mutex_lock(&xn->hash_lock); est = __xt_rateest_lookup(xn, name); mutex_unlock(&xn->hash_lock); return est; } EXPORT_SYMBOL_GPL(xt_rateest_lookup); void xt_rateest_put(struct net *net, struct xt_rateest *est) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); mutex_lock(&xn->hash_lock); if (--est->refcnt == 0) { hlist_del(&est->list); gen_kill_estimator(&est->rate_est); /* * gen_estimator est_timer() might access est->lock or bstats, * wait a RCU grace period before freeing 'est' */ kfree_rcu(est, rcu); } mutex_unlock(&xn->hash_lock); } EXPORT_SYMBOL_GPL(xt_rateest_put); static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateest_target_info *info = par->targinfo; struct gnet_stats_basic_sync *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); u64_stats_add(&stats->bytes, skb->len); u64_stats_inc(&stats->packets); spin_unlock_bh(&info->est->lock); return XT_CONTINUE; } static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) { struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id); struct xt_rateest_target_info *info = par->targinfo; struct xt_rateest *est; struct { struct nlattr opt; struct gnet_estimator est; } cfg; int ret; if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name)) return -ENAMETOOLONG; net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); mutex_lock(&xn->hash_lock); est = __xt_rateest_lookup(xn, info->name); if (est) { mutex_unlock(&xn->hash_lock); /* * If estimator parameters are specified, they must match the * existing estimator. */ if ((!info->interval && !info->ewma_log) || (info->interval != est->params.interval || info->ewma_log != est->params.ewma_log)) { xt_rateest_put(par->net, est); return -EINVAL; } info->est = est; return 0; } ret = -ENOMEM; est = kzalloc(sizeof(*est), GFP_KERNEL); if (!est) goto err1; gnet_stats_basic_sync_init(&est->bstats); strscpy(est->name, info->name, sizeof(est->name)); spin_lock_init(&est->lock); est->refcnt = 1; est->params.interval = info->interval; est->params.ewma_log = info->ewma_log; cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est)); cfg.opt.nla_type = TCA_STATS_RATE_EST; cfg.est.interval = info->interval; cfg.est.ewma_log = info->ewma_log; ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, &est->lock, NULL, &cfg.opt); if (ret < 0) goto err2; info->est = est; xt_rateest_hash_insert(xn, est); mutex_unlock(&xn->hash_lock); return 0; err2: kfree(est); err1: mutex_unlock(&xn->hash_lock); return ret; } static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) { struct xt_rateest_target_info *info = par->targinfo; xt_rateest_put(par->net, info->est); } static struct xt_target xt_rateest_tg_reg __read_mostly = { .name = "RATEEST", .revision = 0, .family = NFPROTO_UNSPEC, .target = xt_rateest_tg, .checkentry = xt_rateest_tg_checkentry, .destroy = xt_rateest_tg_destroy, .targetsize = sizeof(struct xt_rateest_target_info), .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }; static __net_init int xt_rateest_net_init(struct net *net) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); int i; mutex_init(&xn->hash_lock); for (i = 0; i < ARRAY_SIZE(xn->hash); i++) INIT_HLIST_HEAD(&xn->hash[i]); return 0; } static struct pernet_operations xt_rateest_net_ops = { .init = xt_rateest_net_init, .id = &xt_rateest_id, .size = sizeof(struct xt_rateest_net), }; static int __init xt_rateest_tg_init(void) { int err = register_pernet_subsys(&xt_rateest_net_ops); if (err) return err; return xt_register_target(&xt_rateest_tg_reg); } static void __exit xt_rateest_tg_fini(void) { xt_unregister_target(&xt_rateest_tg_reg); unregister_pernet_subsys(&xt_rateest_net_ops); } MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: packet rate estimator"); MODULE_ALIAS("ipt_RATEEST"); MODULE_ALIAS("ip6t_RATEEST"); module_init(xt_rateest_tg_init); module_exit(xt_rateest_tg_fini);
1869 578 587 94 566 52 72 68 1458 617 1824 696 696 16 450 216 236 216 20 1938 297 1813 400 1821 1815 1818 1832 236 236 80 1819 22 22 22 21 22 784 152 784 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/include/linux/jbd2.h * * Written by Stephen C. Tweedie <sct@redhat.com> * * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved * * Definitions for transaction data structures for the buffer cache * filesystem journaling support. */ #ifndef _LINUX_JBD2_H #define _LINUX_JBD2_H /* Allow this file to be included directly into e2fsprogs */ #ifndef __KERNEL__ #include "jfs_compat.h" #define JBD2_DEBUG #else #include <linux/types.h> #include <linux/buffer_head.h> #include <linux/journal-head.h> #include <linux/stddef.h> #include <linux/mutex.h> #include <linux/timer.h> #include <linux/slab.h> #include <linux/bit_spinlock.h> #include <linux/blkdev.h> #include <crypto/hash.h> #endif #define journal_oom_retry 1 /* * Define JBD2_PARANIOD_IOFAIL to cause a kernel BUG() if ext4 finds * certain classes of error which can occur due to failed IOs. Under * normal use we want ext4 to continue after such errors, because * hardware _can_ fail, but for debugging purposes when running tests on * known-good hardware we may want to trap these errors. */ #undef JBD2_PARANOID_IOFAIL /* * The default maximum commit age, in seconds. */ #define JBD2_DEFAULT_MAX_COMMIT_AGE 5 #ifdef CONFIG_JBD2_DEBUG /* * Define JBD2_EXPENSIVE_CHECKING to enable more expensive internal * consistency checks. By default we don't do this unless * CONFIG_JBD2_DEBUG is on. */ #define JBD2_EXPENSIVE_CHECKING void __jbd2_debug(int level, const char *file, const char *func, unsigned int line, const char *fmt, ...); #define jbd2_debug(n, fmt, a...) \ __jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a) #else #define jbd2_debug(n, fmt, a...) no_printk(fmt, ##a) #endif extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 #define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256 #ifdef __KERNEL__ /** * typedef handle_t - The handle_t type represents a single atomic update being performed by some process. * * All filesystem modifications made by the process go * through this handle. Recursive operations (such as quota operations) * are gathered into a single update. * * The buffer credits field is used to account for journaled buffers * being modified by the running process. To ensure that there is * enough log space for all outstanding operations, we need to limit the * number of outstanding buffers possible at any time. When the * operation completes, any buffer credits not used are credited back to * the transaction, so that at all times we know how many buffers the * outstanding updates on a transaction might possibly touch. * * This is an opaque datatype. **/ typedef struct jbd2_journal_handle handle_t; /* Atomic operation type */ /** * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem. * * journal_t is linked to from the fs superblock structure. * * We use the journal_t to keep track of all outstanding transaction * activity on the filesystem, and to manage the state of the log * writing process. * * This is an opaque datatype. **/ typedef struct journal_s journal_t; /* Journal control structure */ #endif /* * Internal structures used by the logging mechanism: */ #define JBD2_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ /* * On-disk structures */ /* * Descriptor block types: */ #define JBD2_DESCRIPTOR_BLOCK 1 #define JBD2_COMMIT_BLOCK 2 #define JBD2_SUPERBLOCK_V1 3 #define JBD2_SUPERBLOCK_V2 4 #define JBD2_REVOKE_BLOCK 5 /* * Standard header for all descriptor blocks: */ typedef struct journal_header_s { __be32 h_magic; __be32 h_blocktype; __be32 h_sequence; } journal_header_t; /* * Checksum types. */ #define JBD2_CRC32_CHKSUM 1 #define JBD2_MD5_CHKSUM 2 #define JBD2_SHA1_CHKSUM 3 #define JBD2_CRC32C_CHKSUM 4 #define JBD2_CRC32_CHKSUM_SIZE 4 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) /* * Commit block header for storing transactional checksums: * * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum* * fields are used to store a checksum of the descriptor and data blocks. * * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum * field is used to store crc32c(uuid+commit_block). Each journal metadata * block gets its own checksum, and data block checksums are stored in * journal_block_tag (in the descriptor). The other h_chksum* fields are * not used. * * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses * journal_block_tag3_t to store a full 32-bit checksum. Everything else * is the same as v2. * * Checksum v1, v2, and v3 are mutually exclusive features. */ struct commit_header { __be32 h_magic; __be32 h_blocktype; __be32 h_sequence; unsigned char h_chksum_type; unsigned char h_chksum_size; unsigned char h_padding[2]; __be32 h_chksum[JBD2_CHECKSUM_BYTES]; __be64 h_commit_sec; __be32 h_commit_nsec; }; /* * The block tag: used to describe a single buffer in the journal. * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this * raw struct shouldn't be used for pointer math or sizeof() - use * journal_tag_bytes(journal) instead to compute this. */ typedef struct journal_block_tag3_s { __be32 t_blocknr; /* The on-disk block number */ __be32 t_flags; /* See below */ __be32 t_blocknr_high; /* most-significant high 32bits. */ __be32 t_checksum; /* crc32c(uuid+seq+block) */ } journal_block_tag3_t; typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ __be16 t_checksum; /* truncated crc32c(uuid+seq+block) */ __be16 t_flags; /* See below */ __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; /* Tail of descriptor or revoke block, for checksumming */ struct jbd2_journal_block_tail { __be32 t_checksum; /* crc32c(uuid+descr_block) */ }; /* * The revoke descriptor: used on disk to describe a series of blocks to * be revoked from the log */ typedef struct jbd2_journal_revoke_header_s { journal_header_t r_header; __be32 r_count; /* Count of bytes used in the block */ } jbd2_journal_revoke_header_t; /* Definitions for the journal tag flags word: */ #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ #define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */ #define JBD2_FLAG_DELETED 4 /* block deleted by this transaction */ #define JBD2_FLAG_LAST_TAG 8 /* last tag in this descriptor block */ /* * The journal superblock. All fields are in big-endian byte order. */ typedef struct journal_superblock_s { /* 0x0000 */ journal_header_t s_header; /* 0x000C */ /* Static information describing the journal */ __be32 s_blocksize; /* journal device blocksize */ __be32 s_maxlen; /* total blocks in journal file */ __be32 s_first; /* first block of log information */ /* 0x0018 */ /* Dynamic information describing the current state of the log */ __be32 s_sequence; /* first commit ID expected in log */ __be32 s_start; /* blocknr of start of log */ /* 0x0020 */ /* Error value, as set by jbd2_journal_abort(). */ __be32 s_errno; /* 0x0024 */ /* Remaining fields are only valid in a version-2 superblock */ __be32 s_feature_compat; /* compatible feature set */ __be32 s_feature_incompat; /* incompatible feature set */ __be32 s_feature_ro_compat; /* readonly-compatible feature set */ /* 0x0030 */ __u8 s_uuid[16]; /* 128-bit uuid for journal */ /* 0x0040 */ __be32 s_nr_users; /* Nr of filesystems sharing log */ __be32 s_dynsuper; /* Blocknr of dynamic superblock copy*/ /* 0x0048 */ __be32 s_max_transaction; /* Limit of journal blocks per trans.*/ __be32 s_max_trans_data; /* Limit of data blocks per trans. */ /* 0x0050 */ __u8 s_checksum_type; /* checksum type */ __u8 s_padding2[3]; /* 0x0054 */ __be32 s_num_fc_blks; /* Number of fast commit blocks */ __be32 s_head; /* blocknr of head of log, only uptodate * while the filesystem is clean */ /* 0x005C */ __u32 s_padding[40]; __be32 s_checksum; /* crc32c(superblock) */ /* 0x0100 */ __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ /* 0x0400 */ } journal_superblock_t; #define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 #define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020 /* See "journal feature predicate functions" below */ /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ JBD2_FEATURE_INCOMPAT_CSUM_V3 | \ JBD2_FEATURE_INCOMPAT_FAST_COMMIT) #ifdef __KERNEL__ #include <linux/fs.h> #include <linux/sched.h> enum jbd_state_bits { BH_JBD /* Has an attached ext3 journal_head */ = BH_PrivateStart, BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ BH_Freed, /* Has been freed (truncated) */ BH_Revoked, /* Has been revoked from the log */ BH_RevokeValid, /* Revoked flag is valid */ BH_JBDDirty, /* Is dirty but journaled */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Shadow, /* IO on shadow buffer is running */ BH_Verified, /* Metadata block has been verified ok */ BH_JBDPrivateStart, /* First bit available for private use by FS */ }; BUFFER_FNS(JBD, jbd) BUFFER_FNS(JWrite, jwrite) BUFFER_FNS(JBDDirty, jbddirty) TAS_BUFFER_FNS(JBDDirty, jbddirty) BUFFER_FNS(Revoked, revoked) TAS_BUFFER_FNS(Revoked, revoked) BUFFER_FNS(RevokeValid, revokevalid) TAS_BUFFER_FNS(RevokeValid, revokevalid) BUFFER_FNS(Freed, freed) BUFFER_FNS(Shadow, shadow) BUFFER_FNS(Verified, verified) static inline struct buffer_head *jh2bh(struct journal_head *jh) { return jh->b_bh; } static inline struct journal_head *bh2jh(struct buffer_head *bh) { return bh->b_private; } static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) { bit_spin_lock(BH_JournalHead, &bh->b_state); } static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) { bit_spin_unlock(BH_JournalHead, &bh->b_state); } #define J_ASSERT(assert) BUG_ON(!(assert)) #define J_ASSERT_BH(bh, expr) J_ASSERT(expr) #define J_ASSERT_JH(jh, expr) J_ASSERT(expr) #if defined(JBD2_PARANOID_IOFAIL) #define J_EXPECT(expr, why...) J_ASSERT(expr) #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) #define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr) #else #define __journal_expect(expr, why...) \ ({ \ int val = (expr); \ if (!val) { \ printk(KERN_ERR \ "JBD2 unexpected failure: %s: %s;\n", \ __func__, #expr); \ printk(KERN_ERR why "\n"); \ } \ val; \ }) #define J_EXPECT(expr, why...) __journal_expect(expr, ## why) #define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why) #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) #endif /* Flags in jbd_inode->i_flags */ #define __JI_COMMIT_RUNNING 0 #define __JI_WRITE_DATA 1 #define __JI_WAIT_DATA 2 /* * Commit of the inode data in progress. We use this flag to protect us from * concurrent deletion of inode. We cannot use reference to inode for this * since we cannot afford doing last iput() on behalf of kjournald */ #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) /* Write allocated dirty buffers in this inode before commit */ #define JI_WRITE_DATA (1 << __JI_WRITE_DATA) /* Wait for outstanding data writes for this inode before commit */ #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { /** * @i_transaction: * * Which transaction does this inode belong to? Either the running * transaction or the committing one. [j_list_lock] */ transaction_t *i_transaction; /** * @i_next_transaction: * * Pointer to the running transaction modifying inode's data in case * there is already a committing transaction touching it. [j_list_lock] */ transaction_t *i_next_transaction; /** * @i_list: List of inodes in the i_transaction [j_list_lock] */ struct list_head i_list; /** * @i_vfs_inode: * * VFS inode this inode belongs to [constant for lifetime of structure] */ struct inode *i_vfs_inode; /** * @i_flags: Flags of inode [j_list_lock] */ unsigned long i_flags; /** * @i_dirty_start: * * Offset in bytes where the dirty range for this inode starts. * [j_list_lock] */ loff_t i_dirty_start; /** * @i_dirty_end: * * Inclusive offset in bytes where the dirty range for this inode * ends. [j_list_lock] */ loff_t i_dirty_end; }; struct jbd2_revoke_table_s; /** * struct jbd2_journal_handle - The jbd2_journal_handle type is the concrete * type associated with handle_t. * @h_transaction: Which compound transaction is this update a part of? * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_total_credits: Number of remaining buffers we are allowed to add to * journal. These are dirty buffers and revoke descriptor blocks. * @h_revoke_credits: Number of remaining revoke records available for handle * @h_ref: Reference count on this handle. * @h_err: Field for caller's use to track errors through large fs operations. * @h_sync: Flag for sync-on-close. * @h_jdata: Flag to force data journaling. * @h_reserved: Flag for handle for reserved credits. * @h_aborted: Flag indicating fatal error on handle. * @h_type: For handle statistics. * @h_line_no: For handle statistics. * @h_start_jiffies: Handle Start time. * @h_requested_credits: Holds @h_total_credits after handle is started. * @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started. * @saved_alloc_context: Saved context while transaction is open. **/ /* Docbook can't yet cope with the bit fields, but will leave the documentation * in so it can be fixed later. */ struct jbd2_journal_handle { union { transaction_t *h_transaction; /* Which journal handle belongs to - used iff h_reserved set */ journal_t *h_journal; }; handle_t *h_rsv_handle; int h_total_credits; int h_revoke_credits; int h_revoke_credits_requested; int h_ref; int h_err; /* Flags [no locking] */ unsigned int h_sync: 1; unsigned int h_jdata: 1; unsigned int h_reserved: 1; unsigned int h_aborted: 1; unsigned int h_type: 8; unsigned int h_line_no: 16; unsigned long h_start_jiffies; unsigned int h_requested_credits; unsigned int saved_alloc_context; }; /* * Some stats for checkpoint phase */ struct transaction_chp_stats_s { unsigned long cs_chp_time; __u32 cs_forced_to_close; __u32 cs_written; __u32 cs_dropped; }; /* The transaction_t type is the guts of the journaling mechanism. It * tracks a compound transaction through its various states: * * RUNNING: accepting new updates * LOCKED: Updates still running but we don't accept new ones * RUNDOWN: Updates are tidying up but have finished requesting * new buffers to modify (state not used for now) * FLUSH: All updates complete, but we are still writing to disk * COMMIT: All data on disk, writing commit record * FINISHED: We still have to keep the transaction for checkpointing. * * The transaction keeps track of all of the buffers modified by a * running transaction, and all of the buffers committed but not yet * flushed to home for finished transactions. * (Locking Documentation improved by LockDoc) */ /* * Lock ranking: * * j_list_lock * ->jbd_lock_bh_journal_head() (This is "innermost") * * j_state_lock * ->b_state_lock * * b_state_lock * ->j_list_lock * * j_state_lock * ->j_list_lock (journal_unmap_buffer) * */ struct transaction_s { /* Pointer to the journal for this transaction. [no locking] */ journal_t *t_journal; /* Sequence number for this transaction [no locking] */ tid_t t_tid; /* * Transaction's current state * [no locking - only kjournald2 alters this] * [j_list_lock] guards transition of a transaction into T_FINISHED * state and subsequent call of __jbd2_journal_drop_transaction() * FIXME: needs barriers * KLUDGE: [use j_state_lock] */ enum { T_RUNNING, T_LOCKED, T_SWITCH, T_FLUSH, T_COMMIT, T_COMMIT_DFLUSH, T_COMMIT_JFLUSH, T_COMMIT_CALLBACK, T_FINISHED } t_state; /* * Where in the log does this transaction's commit start? [no locking] */ unsigned long t_log_start; /* * Number of buffers on the t_buffers list [j_list_lock, no locks * needed for jbd2 thread] */ int t_nr_buffers; /* * Doubly-linked circular list of all buffers reserved but not yet * modified by this transaction [j_list_lock, no locks needed fo * jbd2 thread] */ struct journal_head *t_reserved_list; /* * Doubly-linked circular list of all metadata buffers owned by this * transaction [j_list_lock, no locks needed for jbd2 thread] */ struct journal_head *t_buffers; /* * Doubly-linked circular list of all forget buffers (superseded * buffers which we can un-checkpoint once this transaction commits) * [j_list_lock] */ struct journal_head *t_forget; /* * Doubly-linked circular list of all buffers still to be flushed before * this transaction can be checkpointed. [j_list_lock] */ struct journal_head *t_checkpoint_list; /* * Doubly-linked circular list of metadata buffers being * shadowed by log IO. The IO buffers on the iobuf list and * the shadow buffers on this list match each other one for * one at all times. [j_list_lock, no locks needed for jbd2 * thread] */ struct journal_head *t_shadow_list; /* * List of inodes associated with the transaction; e.g., ext4 uses * this to track inodes in data=ordered and data=journal mode that * need special handling on transaction commit; also used by ocfs2. * [j_list_lock] */ struct list_head t_inode_list; /* * Longest time some handle had to wait for running transaction */ unsigned long t_max_wait; /* * When transaction started */ unsigned long t_start; /* * When commit was requested [j_state_lock] */ unsigned long t_requested; /* * Checkpointing stats [j_list_lock] */ struct transaction_chp_stats_s t_chp_stats; /* * Number of outstanding updates running on this transaction * [none] */ atomic_t t_updates; /* * Number of blocks reserved for this transaction in the journal. * This is including all credits reserved when starting transaction * handles as well as all journal descriptor blocks needed for this * transaction. [none] */ atomic_t t_outstanding_credits; /* * Number of revoke records for this transaction added by already * stopped handles. [none] */ atomic_t t_outstanding_revokes; /* * How many handles used this transaction? [none] */ atomic_t t_handle_count; /* * Forward and backward links for the circular list of all transactions * awaiting checkpoint. [j_list_lock] */ transaction_t *t_cpnext, *t_cpprev; /* * When will the transaction expire (become due for commit), in jiffies? * [no locking] */ unsigned long t_expires; /* * When this transaction started, in nanoseconds [no locking] */ ktime_t t_start_time; /* * This transaction is being forced and some process is * waiting for it to finish. */ unsigned int t_synchronous_commit:1; /* Disk flush needs to be sent to fs partition [no locking] */ int t_need_data_flush; /* * For use by the filesystem to store fs-specific data * structures associated with the transaction */ struct list_head t_private_list; }; struct transaction_run_stats_s { unsigned long rs_wait; unsigned long rs_request_delay; unsigned long rs_running; unsigned long rs_locked; unsigned long rs_flushing; unsigned long rs_logging; __u32 rs_handle_count; __u32 rs_blocks; __u32 rs_blocks_logged; }; struct transaction_stats_s { unsigned long ts_tid; unsigned long ts_requested; struct transaction_run_stats_s run; }; static inline unsigned long jbd2_time_diff(unsigned long start, unsigned long end) { if (end >= start) return end - start; return end + (MAX_JIFFY_OFFSET - start); } #define JBD2_NR_BATCH 64 enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; #define JBD2_FC_REPLAY_STOP 0 #define JBD2_FC_REPLAY_CONTINUE 1 /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. */ struct journal_s { /** * @j_flags: General journaling state flags [j_state_lock, * no lock for quick racy checks] */ unsigned long j_flags; /** * @j_errno: * * Is there an outstanding uncleared error on the journal (from a prior * abort)? [j_state_lock] */ int j_errno; /** * @j_abort_mutex: Lock the whole aborting procedure. */ struct mutex j_abort_mutex; /** * @j_sb_buffer: The first part of the superblock buffer. */ struct buffer_head *j_sb_buffer; /** * @j_superblock: The second part of the superblock buffer. */ journal_superblock_t *j_superblock; /** * @j_state_lock: Protect the various scalars in the journal. */ rwlock_t j_state_lock; /** * @j_barrier_count: * * Number of processes waiting to create a barrier lock [j_state_lock, * no lock for quick racy checks] */ int j_barrier_count; /** * @j_barrier: The barrier lock itself. */ struct mutex j_barrier; /** * @j_running_transaction: * * Transactions: The current running transaction... * [j_state_lock, no lock for quick racy checks] [caller holding * open handle] */ transaction_t *j_running_transaction; /** * @j_committing_transaction: * * the transaction we are pushing to disk * [j_state_lock] [caller holding open handle] */ transaction_t *j_committing_transaction; /** * @j_checkpoint_transactions: * * ... and a linked circular list of all transactions waiting for * checkpointing. [j_list_lock] */ transaction_t *j_checkpoint_transactions; /** * @j_wait_transaction_locked: * * Wait queue for waiting for a locked transaction to start committing, * or for a barrier lock to be released. */ wait_queue_head_t j_wait_transaction_locked; /** * @j_wait_done_commit: Wait queue for waiting for commit to complete. */ wait_queue_head_t j_wait_done_commit; /** * @j_wait_commit: Wait queue to trigger commit. */ wait_queue_head_t j_wait_commit; /** * @j_wait_updates: Wait queue to wait for updates to complete. */ wait_queue_head_t j_wait_updates; /** * @j_wait_reserved: * * Wait queue to wait for reserved buffer credits to drop. */ wait_queue_head_t j_wait_reserved; /** * @j_fc_wait: * * Wait queue to wait for completion of async fast commits. */ wait_queue_head_t j_fc_wait; /** * @j_checkpoint_mutex: * * Semaphore for locking against concurrent checkpoints. */ struct mutex j_checkpoint_mutex; /** * @j_chkpt_bhs: * * List of buffer heads used by the checkpoint routine. This * was moved from jbd2_log_do_checkpoint() to reduce stack * usage. Access to this array is controlled by the * @j_checkpoint_mutex. [j_checkpoint_mutex] */ struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; /** * @j_shrinker: * * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: * * Number of journal buffers on the checkpoint list. [j_list_lock] */ struct percpu_counter j_checkpoint_jh_count; /** * @j_shrink_transaction: * * Record next transaction will shrink on the checkpoint list. * [j_list_lock] */ transaction_t *j_shrink_transaction; /** * @j_head: * * Journal head: identifies the first unused block in the journal. * [j_state_lock] */ unsigned long j_head; /** * @j_tail: * * Journal tail: identifies the oldest still-used block in the journal. * [j_state_lock] */ unsigned long j_tail; /** * @j_free: * * Journal free: how many free blocks are there in the journal? * [j_state_lock] */ unsigned long j_free; /** * @j_first: * * The block number of the first usable block in the journal * [j_state_lock]. */ unsigned long j_first; /** * @j_last: * * The block number one beyond the last usable block in the journal * [j_state_lock]. */ unsigned long j_last; /** * @j_fc_first: * * The block number of the first fast commit block in the journal * [j_state_lock]. */ unsigned long j_fc_first; /** * @j_fc_off: * * Number of fast commit blocks currently allocated. Accessed only * during fast commit. Currently only process can do fast commit, so * this field is not protected by any lock. */ unsigned long j_fc_off; /** * @j_fc_last: * * The block number one beyond the last fast commit block in the journal * [j_state_lock]. */ unsigned long j_fc_last; /** * @j_dev: Device where we store the journal. */ struct block_device *j_dev; /** * @j_blocksize: Block size for the location where we store the journal. */ int j_blocksize; /** * @j_blk_offset: * * Starting block offset into the device where we store the journal. */ unsigned long long j_blk_offset; /** * @j_devname: Journal device name. */ char j_devname[BDEVNAME_SIZE+24]; /** * @j_fs_dev: * * Device which holds the client fs. For internal journal this will be * equal to j_dev. */ struct block_device *j_fs_dev; /** * @j_fs_dev_wb_err: * * Records the errseq of the client fs's backing block device. */ errseq_t j_fs_dev_wb_err; /** * @j_total_len: Total maximum capacity of the journal region on disk. */ unsigned int j_total_len; /** * @j_reserved_credits: * * Number of buffers reserved from the running transaction. */ atomic_t j_reserved_credits; /** * @j_list_lock: Protects the buffer lists and internal buffer state. */ spinlock_t j_list_lock; /** * @j_inode: * * Optional inode where we store the journal. If present, all * journal block numbers are mapped into this inode via bmap(). */ struct inode *j_inode; /** * @j_tail_sequence: * * Sequence number of the oldest transaction in the log [j_state_lock] */ tid_t j_tail_sequence; /** * @j_transaction_sequence: * * Sequence number of the next transaction to grant [j_state_lock] */ tid_t j_transaction_sequence; /** * @j_commit_sequence: * * Sequence number of the most recently committed transaction * [j_state_lock, no lock for quick racy checks] */ tid_t j_commit_sequence; /** * @j_commit_request: * * Sequence number of the most recent transaction wanting commit * [j_state_lock, no lock for quick racy checks] */ tid_t j_commit_request; /** * @j_uuid: * * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single * journal and to perform atomic updates across them. */ __u8 j_uuid[16]; /** * @j_task: Pointer to the current commit thread for this journal. */ struct task_struct *j_task; /** * @j_max_transaction_buffers: * * Maximum number of metadata buffers to allow in a single compound * commit transaction. */ int j_max_transaction_buffers; /** * @j_revoke_records_per_block: * * Number of revoke records that fit in one descriptor block. */ int j_revoke_records_per_block; /** * @j_commit_interval: * * What is the maximum transaction lifetime before we begin a commit? */ unsigned long j_commit_interval; /** * @j_commit_timer: The timer used to wakeup the commit thread. */ struct timer_list j_commit_timer; /** * @j_revoke_lock: Protect the revoke table. */ spinlock_t j_revoke_lock; /** * @j_revoke: * * The revoke table - maintains the list of revoked blocks in the * current transaction. */ struct jbd2_revoke_table_s *j_revoke; /** * @j_revoke_table: Alternate revoke tables for j_revoke. */ struct jbd2_revoke_table_s *j_revoke_table[2]; /** * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. */ struct buffer_head **j_wbuf; /** * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only * during a fast commit. Currently only process can do fast commit, so * this field is not protected by any lock. */ struct buffer_head **j_fc_wbuf; /** * @j_wbufsize: * * Size of @j_wbuf array. */ int j_wbufsize; /** * @j_fc_wbufsize: * * Size of @j_fc_wbuf array. */ int j_fc_wbufsize; /** * @j_last_sync_writer: * * The pid of the last person to run a synchronous operation * through the journal. */ pid_t j_last_sync_writer; /** * @j_average_commit_time: * * The average amount of time in nanoseconds it takes to commit a * transaction to disk. [j_state_lock] */ u64 j_average_commit_time; /** * @j_min_batch_time: * * Minimum time that we should wait for additional filesystem operations * to get batched into a synchronous handle in microseconds. */ u32 j_min_batch_time; /** * @j_max_batch_time: * * Maximum time that we should wait for additional filesystem operations * to get batched into a synchronous handle in microseconds. */ u32 j_max_batch_time; /** * @j_commit_callback: * * This function is called when a transaction is closed. */ void (*j_commit_callback)(journal_t *, transaction_t *); /** * @j_submit_inode_data_buffers: * * This function is called for all inodes associated with the * committing transaction marked with JI_WRITE_DATA flag * before we start to write out the transaction to the journal. */ int (*j_submit_inode_data_buffers) (struct jbd2_inode *); /** * @j_finish_inode_data_buffers: * * This function is called for all inodes associated with the * committing transaction marked with JI_WAIT_DATA flag * after we have written the transaction to the journal * but before we write out the commit block. */ int (*j_finish_inode_data_buffers) (struct jbd2_inode *); /* * Journal statistics */ /** * @j_history_lock: Protect the transactions statistics history. */ spinlock_t j_history_lock; /** * @j_proc_entry: procfs entry for the jbd statistics directory. */ struct proc_dir_entry *j_proc_entry; /** * @j_stats: Overall statistics. */ struct transaction_stats_s j_stats; /** * @j_failed_commit: Failed journal commit ID. */ unsigned int j_failed_commit; /** * @j_private: * * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here. */ void *j_private; /** * @j_chksum_driver: * * Reference to checksum algorithm driver via cryptoapi. */ struct crypto_shash *j_chksum_driver; /** * @j_csum_seed: * * Precomputed journal UUID checksum for seeding other checksums. */ __u32 j_csum_seed; #ifdef CONFIG_DEBUG_LOCK_ALLOC /** * @j_trans_commit_map: * * Lockdep entity to track transaction commit dependencies. Handles * hold this "lock" for read, when we wait for commit, we acquire the * "lock" for writing. This matches the properties of jbd2 journalling * where the running transaction has to wait for all handles to be * dropped to commit that transaction and also acquiring a handle may * require transaction commit to finish. */ struct lockdep_map j_trans_commit_map; #endif /** * @j_fc_cleanup_callback: * * Clean-up after fast commit or full commit. JBD2 calls this function * after every commit operation. */ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); /** * @j_fc_replay_callback: * * File-system specific function that performs replay of a fast * commit. JBD2 calls this function for each fast commit block found in * the journal. This function should return JBD2_FC_REPLAY_CONTINUE * to indicate that the block was processed correctly and more fast * commit replay should continue. Return value of JBD2_FC_REPLAY_STOP * indicates the end of replay (no more blocks remaining). A negative * return value indicates error. */ int (*j_fc_replay_callback)(struct journal_s *journal, struct buffer_head *bh, enum passtype pass, int off, tid_t expected_commit_id); /** * @j_bmap: * * Bmap function that should be used instead of the generic * VFS bmap function. */ int (*j_bmap)(struct journal_s *journal, sector_t *block); }; #define jbd2_might_wait_for_commit(j) \ do { \ rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \ rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \ } while (0) /* * We can support any known requested features iff the * superblock is not in version 1. Otherwise we fail to support any * extended sb features. */ static inline bool jbd2_format_support_feature(journal_t *j) { return j->j_superblock->s_header.h_blocktype != cpu_to_be32(JBD2_SUPERBLOCK_V1); } /* journal feature predicate functions */ #define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline bool jbd2_has_feature_##name(journal_t *j) \ { \ return (jbd2_format_support_feature(j) && \ ((j)->j_superblock->s_feature_compat & \ cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \ } \ static inline void jbd2_set_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_compat |= \ cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ } \ static inline void jbd2_clear_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_compat &= \ ~cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ } #define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ static inline bool jbd2_has_feature_##name(journal_t *j) \ { \ return (jbd2_format_support_feature(j) && \ ((j)->j_superblock->s_feature_ro_compat & \ cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \ } \ static inline void jbd2_set_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_ro_compat |= \ cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ } \ static inline void jbd2_clear_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_ro_compat &= \ ~cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ } #define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \ static inline bool jbd2_has_feature_##name(journal_t *j) \ { \ return (jbd2_format_support_feature(j) && \ ((j)->j_superblock->s_feature_incompat & \ cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \ } \ static inline void jbd2_set_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_incompat |= \ cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ } \ static inline void jbd2_clear_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_incompat &= \ ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ } JBD2_FEATURE_COMPAT_FUNCS(checksum, CHECKSUM) JBD2_FEATURE_INCOMPAT_FUNCS(revoke, REVOKE) JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) /* Journal high priority write IO operation flags */ #define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE) /* * Journal flag definitions */ #define JBD2_UNMOUNT 0x001 /* Journal thread is being destroyed */ #define JBD2_ABORT 0x002 /* Journaling has been aborted for errors. */ #define JBD2_ACK_ERR 0x004 /* The errno in the sb has been acked */ #define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ #define JBD2_BARRIER 0x020 /* Use IDE barriers */ #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ #define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on * clean and empty filesystem * logging area */ #define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */ #define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */ #define JBD2_JOURNAL_FLUSH_DISCARD 0x0001 #define JBD2_JOURNAL_FLUSH_ZEROOUT 0x0002 #define JBD2_JOURNAL_FLUSH_VALID (JBD2_JOURNAL_FLUSH_DISCARD | \ JBD2_JOURNAL_FLUSH_ZEROOUT) /* * Function declarations for the journaling transaction and buffer * management */ /* Filing buffers */ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) { list_add_tail(&bh->b_assoc_buffers, head); } static inline void jbd2_unfile_log_bh(struct buffer_head *bh) { list_del_init(&bh->b_assoc_buffers); } /* Log buffer allocation */ struct buffer_head *jbd2_journal_get_descriptor_buffer(transaction_t *, int); void jbd2_descriptor_block_csum_set(journal_t *, struct buffer_head *); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); /* Commit management */ extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan); int __jbd2_journal_remove_checkpoint(struct journal_head *); int jbd2_journal_try_remove_checkpoint(struct journal_head *jh); void jbd2_journal_destroy_checkpoint(journal_t *journal); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); /* * Triggers */ struct jbd2_buffer_trigger_type { /* * Fired a the moment data to write to the journal are known to be * stable - so either at the moment b_frozen_data is created or just * before a buffer is written to the journal. mapped_data is a mapped * buffer that is the frozen data for commit. */ void (*t_frozen)(struct jbd2_buffer_trigger_type *type, struct buffer_head *bh, void *mapped_data, size_t size); /* * Fired during journal abort for dirty buffers that will not be * committed. */ void (*t_abort)(struct jbd2_buffer_trigger_type *type, struct buffer_head *bh); }; extern void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, struct jbd2_buffer_trigger_type *triggers); extern void jbd2_buffer_abort_trigger(struct journal_head *jh, struct jbd2_buffer_trigger_type *triggers); /* Buffer IO */ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct buffer_head **bh_out, sector_t blocknr); /* Transaction cache support */ extern void jbd2_journal_destroy_transaction_cache(void); extern int __init jbd2_journal_init_transaction_cache(void); extern void jbd2_journal_free_transaction(transaction_t *); /* * Journal locking. * * We need to lock the journal during transaction state changes so that nobody * ever tries to take a handle on the running transaction while we are in the * middle of moving it to the commit phase. j_state_lock does this. * * Note that the locking is completely interrupt unsafe. We never touch * journal structures from interrupts. */ static inline handle_t *journal_current_handle(void) { return current->journal_info; } /* The journaling code user interface: * * Create and destroy handles * Register buffer modifications against the current transaction. */ extern handle_t *jbd2_journal_start(journal_t *, int nblocks); extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks, int revoke_records, gfp_t gfp_mask, unsigned int type, unsigned int line_no); extern int jbd2_journal_restart(handle_t *, int nblocks); extern int jbd2__journal_restart(handle_t *, int nblocks, int revoke_records, gfp_t gfp_mask); extern int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, unsigned int line_no); extern void jbd2_journal_free_reserved(handle_t *handle); extern int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); void jbd2_journal_set_triggers(struct buffer_head *, struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); int jbd2_journal_invalidate_folio(journal_t *, struct folio *, size_t offset, size_t length); bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio); extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_flush(journal_t *journal, unsigned int flags); extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *); void jbd2_journal_wait_updates(journal_t *); extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, unsigned long long start, int len, int bsize); extern journal_t * jbd2_journal_init_inode (struct inode *); extern int jbd2_journal_update_format (journal_t *); extern int jbd2_journal_check_used_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_check_available_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, blk_opf_t); extern void jbd2_journal_abort (journal_t *, int); extern int jbd2_journal_errno (journal_t *); extern void jbd2_journal_ack_err (journal_t *); extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *); extern int jbd2_journal_inode_ranged_write(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); extern int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); extern int jbd2_journal_finish_inode_data_buffers( struct jbd2_inode *jinode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); /* * journal_head management */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); void jbd2_journal_put_journal_head(struct journal_head *jh); /* * handle management */ extern struct kmem_cache *jbd2_handle_cache; static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags) { return kmem_cache_zalloc(jbd2_handle_cache, gfp_flags); } static inline void jbd2_free_handle(handle_t *handle) { kmem_cache_free(jbd2_handle_cache, handle); } /* * jbd2_inode management (optional, for those file systems that want to use * dynamically allocated jbd2_inode structures) */ extern struct kmem_cache *jbd2_inode_cache; static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags) { return kmem_cache_alloc(jbd2_inode_cache, gfp_flags); } static inline void jbd2_free_inode(struct jbd2_inode *jinode) { kmem_cache_free(jbd2_inode_cache, jinode); } /* Primary revoke support */ #define JOURNAL_REVOKE_DEFAULT_HASH 256 extern int jbd2_journal_init_revoke(journal_t *, int); extern void jbd2_journal_destroy_revoke_record_cache(void); extern void jbd2_journal_destroy_revoke_table_cache(void); extern int __init jbd2_journal_init_revoke_record_cache(void); extern int __init jbd2_journal_init_revoke_table_cache(void); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); extern void jbd2_journal_write_revoke_records(transaction_t *transaction, struct list_head *log_bufs); /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); extern void jbd2_journal_clear_revoke(journal_t *); extern void jbd2_journal_switch_revoke_table(journal_t *journal); extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); /* * The log thread user interface: * * Request space in the current transaction, and force transaction commit * transitions on demand. */ int jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_transaction_committed(journal_t *journal, tid_t tid); int jbd2_complete_transaction(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); void __jbd2_log_wait_for_space(journal_t *journal); extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) { return (journal->j_total_len - journal->j_fc_wbufsize) / 4; } /* * is_journal_abort * * Simple test wrapper function to test the JBD2_ABORT state flag. This * bit, when set, indicates that we have had a fatal error somewhere, * either inside the journaling layer or indicated to us by the client * (eg. ext3), and that we and should not commit any further * transactions. */ static inline int is_journal_aborted(journal_t *journal) { return journal->j_flags & JBD2_ABORT; } static inline int is_handle_aborted(handle_t *handle) { if (handle->h_aborted || !handle->h_transaction) return 1; return is_journal_aborted(handle->h_transaction->t_journal); } static inline void jbd2_journal_abort_handle(handle_t *handle) { handle->h_aborted = 1; } static inline void jbd2_init_fs_dev_write_error(journal_t *journal) { struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; /* * Save the original wb_err value of client fs's bdev mapping which * could be used to detect the client fs's metadata async write error. */ errseq_check_and_advance(&mapping->wb_err, &journal->j_fs_dev_wb_err); } static inline int jbd2_check_fs_dev_write_error(journal_t *journal) { struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; return errseq_check(&mapping->wb_err, READ_ONCE(journal->j_fs_dev_wb_err)); } #endif /* __KERNEL__ */ /* Comparison functions for transaction IDs: perform comparisons using * modulo arithmetic so that they work over sequence number wraps. */ static inline int tid_gt(tid_t x, tid_t y) { int difference = (x - y); return (difference > 0); } static inline int tid_geq(tid_t x, tid_t y) { int difference = (x - y); return (difference >= 0); } extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) { return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j); } static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && journal->j_chksum_driver == NULL); return journal->j_chksum_driver != NULL; } static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) { int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks); return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS; } /* * Return number of free blocks in the log. Must be called under j_state_lock. */ static inline unsigned long jbd2_log_space_left(journal_t *journal) { /* Allow for rounding errors */ long free = journal->j_free - 32; if (journal->j_committing_transaction) { free -= atomic_read(&journal-> j_committing_transaction->t_outstanding_credits); } return max_t(long, free, 0); } /* * Definitions which augment the buffer_head layer */ /* journaling buffer types */ #define BJ_None 0 /* Not journaled */ #define BJ_Metadata 1 /* Normal journaled metadata */ #define BJ_Forget 2 /* Buffer superseded by this transaction */ #define BJ_Shadow 3 /* Buffer contents being shadowed to the log */ #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 /* JBD uses a CRC32 checksum */ #define JBD_MAX_CHECKSUM_SIZE 4 static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { struct { struct shash_desc shash; char ctx[JBD_MAX_CHECKSUM_SIZE]; } desc; int err; BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > JBD_MAX_CHECKSUM_SIZE); desc.shash.tfm = journal->j_chksum_driver; *(u32 *)desc.ctx = crc; err = crypto_shash_update(&desc.shash, address, length); BUG_ON(err); return *(u32 *)desc.ctx; } /* Return most recent uncommitted transaction */ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) { tid_t tid; read_lock(&journal->j_state_lock); tid = journal->j_commit_request; if (journal->j_running_transaction) tid = journal->j_running_transaction->t_tid; read_unlock(&journal->j_state_lock); return tid; } static inline int jbd2_handle_buffer_credits(handle_t *handle) { journal_t *journal; if (!handle->h_reserved) journal = handle->h_transaction->t_journal; else journal = handle->h_journal; return handle->h_total_credits - DIV_ROUND_UP(handle->h_revoke_credits_requested, journal->j_revoke_records_per_block); } #ifdef __KERNEL__ #define buffer_trace_init(bh) do {} while (0) #define print_buffer_fields(bh) do {} while (0) #define print_buffer_trace(bh) do {} while (0) #define BUFFER_TRACE(bh, info) do {} while (0) #define BUFFER_TRACE2(bh, bh2, info) do {} while (0) #define JBUFFER_TRACE(jh, info) do {} while (0) #endif /* __KERNEL__ */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #endif /* _LINUX_JBD2_H */
25 15 27 14 15 4 4 87 87 15 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __BEN_VLAN_802_1Q_INC__ #define __BEN_VLAN_802_1Q_INC__ #include <linux/if_vlan.h> #include <linux/u64_stats_sync.h> #include <linux/list.h> /* if this changes, algorithm will have to be reworked because this * depends on completely exhausting the VLAN identifier space. Thus * it gives constant time look-up, but in many cases it wastes memory. */ #define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 #define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) enum vlan_protos { VLAN_PROTO_8021Q = 0, VLAN_PROTO_8021AD, VLAN_PROTO_NUM, }; struct vlan_group { unsigned int nr_vlan_devs; struct hlist_node hlist; /* linked list */ struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM] [VLAN_GROUP_ARRAY_SPLIT_PARTS]; }; struct vlan_info { struct net_device *real_dev; /* The ethernet(like) device * the vlan is attached to. */ struct vlan_group grp; struct list_head vid_list; unsigned int nr_vids; struct rcu_head rcu; }; static inline int vlan_proto_idx(__be16 proto) { switch (proto) { case htons(ETH_P_8021Q): return VLAN_PROTO_8021Q; case htons(ETH_P_8021AD): return VLAN_PROTO_8021AD; default: WARN(1, "invalid VLAN protocol: 0x%04x\n", ntohs(proto)); return -EINVAL; } } static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg, unsigned int pidx, u16 vlan_id) { struct net_device **array; array = vg->vlan_devices_arrays[pidx] [vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; /* paired with smp_wmb() in vlan_group_prealloc_vid() */ smp_rmb(); return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; } static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, __be16 vlan_proto, u16 vlan_id) { int pidx = vlan_proto_idx(vlan_proto); if (pidx < 0) return NULL; return __vlan_group_get_device(vg, pidx, vlan_id); } static inline void vlan_group_set_device(struct vlan_group *vg, __be16 vlan_proto, u16 vlan_id, struct net_device *dev) { int pidx = vlan_proto_idx(vlan_proto); struct net_device **array; if (!vg || pidx < 0) return; array = vg->vlan_devices_arrays[pidx] [vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; } /* Must be invoked with rcu_read_lock or with RTNL. */ static inline struct net_device *vlan_find_dev(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); if (vlan_info) return vlan_group_get_device(&vlan_info->grp, vlan_proto, vlan_id); return NULL; } static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev) { netdev_features_t ret; ret = real_dev->hw_enc_features & (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL); if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK)) return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM; return 0; } #define vlan_group_for_each_dev(grp, i, dev) \ for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \ if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \ (i) % VLAN_N_VID))) int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto); void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto); /* found in vlan_dev.c */ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); void vlan_dev_free_egress_priority(const struct net_device *dev); int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result, size_t size); int vlan_check_real_dev(struct net_device *real_dev, __be16 protocol, u16 vlan_id, struct netlink_ext_ack *extack); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); static inline u32 vlan_get_ingress_priority(struct net_device *dev, u16 vlan_tci) { struct vlan_dev_priv *vip = vlan_dev_priv(dev); return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7]; } #ifdef CONFIG_VLAN_8021Q_GVRP int vlan_gvrp_request_join(const struct net_device *dev); void vlan_gvrp_request_leave(const struct net_device *dev); int vlan_gvrp_init_applicant(struct net_device *dev); void vlan_gvrp_uninit_applicant(struct net_device *dev); int vlan_gvrp_init(void); void vlan_gvrp_uninit(void); #else static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; } static inline void vlan_gvrp_request_leave(const struct net_device *dev) {} static inline int vlan_gvrp_init_applicant(struct net_device *dev) { return 0; } static inline void vlan_gvrp_uninit_applicant(struct net_device *dev) {} static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif #ifdef CONFIG_VLAN_8021Q_MVRP int vlan_mvrp_request_join(const struct net_device *dev); void vlan_mvrp_request_leave(const struct net_device *dev); int vlan_mvrp_init_applicant(struct net_device *dev); void vlan_mvrp_uninit_applicant(struct net_device *dev); int vlan_mvrp_init(void); void vlan_mvrp_uninit(void); #else static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; } static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {} static inline int vlan_mvrp_init(void) { return 0; } static inline void vlan_mvrp_uninit(void) {} #endif extern const char vlan_fullname[]; extern const char vlan_version[]; int vlan_netlink_init(void); void vlan_netlink_fini(void); extern struct rtnl_link_ops vlan_link_ops; extern unsigned int vlan_net_id; struct proc_dir_entry; struct vlan_net { /* /proc/net/vlan */ struct proc_dir_entry *proc_vlan_dir; /* /proc/net/vlan/config */ struct proc_dir_entry *proc_vlan_conf; /* Determines interface naming scheme. */ unsigned short name_type; }; #endif /* !(__BEN_VLAN_802_1Q_INC__) */
6 2 16 1 1 1 1 1 1 11 3 3 6 16 16 16 8 7 3 10 6 1 1 2 2 1 1 10 10 10 10 10 10 10 2 8 8 8 8 4 8 4 8 5 4 1 7 7 10 25 26 26 26 4 2 2 2 23 1 1 1 1 21 21 21 23 24 24 24 2 2 2 2 4 4 4 4 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 2 1 2 1 1 1 1 1 6 6 6 6 1 1 1 1 6 6 6 5 6 6 6 6 6 6 6 6 6 1 5 6 6 1 1 1 6 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 1 6 6 6 6 6 6 6 6 6 6 6 6 6 2 2 2 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 1 2 2 17 2 17 17 15 15 2 17 17 17 1 16 1 1 22 1 20 4 11 20 1 1 1 18 6 19 3 1 17 4 2 15 2 15 3 20 5 5 5 5 5 2 3 1 3 3 3 5 16 16 16 16 16 16 2 2 2 2 2 2 2 2 4 3 3 3 4 2 2 2 2 2 7 7 7 7 5 5 7 7 7 6 7 7 7 7 7 5 5 5 5 5 5 5 9 2 8 9 7 7 7 2 7 9 8 9 9 9 9 8 9 9 9 9 9 9 9 8 9 9 7 1 1 7 2 2 5 9 9 9 22 22 22 18 18 18 18 16 16 16 16 1 13 1 1 13 1 15 15 15 15 15 15 5 12 7 15 30 2 2 30 30 1 1 1 1 1 38 38 38 38 38 37 38 38 37 37 38 38 38 38 37 37 38 38 15 15 15 15 15 12 15 4 4 4 4 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 configuration hooks for cfg80211 * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/nl80211.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <linux/rcupdate.h> #include <linux/fips.h> #include <linux/if_ether.h> #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "mesh.h" #include "wme.h" static struct ieee80211_link_data * ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id, bool require_valid) { struct ieee80211_link_data *link; if (link_id < 0) { /* * For keys, if sdata is not an MLD, we might not use * the return value at all (if it's not a pairwise key), * so in that case (require_valid==false) don't error. */ if (require_valid && ieee80211_vif_is_mld(&sdata->vif)) return ERR_PTR(-EINVAL); return &sdata->deflink; } link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return ERR_PTR(-ENOLINK); return link; } static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { bool mu_mimo_groups = false; bool mu_mimo_follow = false; if (params->vht_mumimo_groups) { u64 membership; BUILD_BUG_ON(sizeof(membership) != WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.position, params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, WLAN_USER_POSITION_LEN); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MU_GROUPS); /* don't care about endianness - just check for 0 */ memcpy(&membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); mu_mimo_groups = membership != 0; } if (params->vht_mumimo_follow_addr) { mu_mimo_follow = is_valid_ether_addr(params->vht_mumimo_follow_addr); ether_addr_copy(sdata->u.mntr.mu_follow_addr, params->vht_mumimo_follow_addr); } sdata->vif.bss_conf.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow; } static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *monitor_sdata; /* check flags first */ if (params->flags && ieee80211_sdata_running(sdata)) { u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE; /* * Prohibit MONITOR_FLAG_COOK_FRAMES and * MONITOR_FLAG_ACTIVE to be changed while the * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ if ((params->flags & mask) != (sdata->u.mntr.flags & mask)) return -EBUSY; } /* also validate MU-MIMO change */ monitor_sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!monitor_sdata && (params->vht_mumimo_groups || params->vht_mumimo_follow_addr)) return -EOPNOTSUPP; /* apply all changes now - no failures allowed */ if (monitor_sdata) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { if (ieee80211_sdata_running(sdata)) { ieee80211_adjust_monitor_flags(sdata, -1); sdata->u.mntr.flags = params->flags; ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); } else { /* * Because the interface is down, ieee80211_do_stop * and ieee80211_do_open take care of "everything" * mentioned in the comment above. */ sdata->u.mntr.flags = params->flags; } } return 0; } static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, struct cfg80211_mbssid_config params, struct ieee80211_bss_conf *link_conf) { struct ieee80211_sub_if_data *tx_sdata; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) return -EINVAL; tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev); if (!tx_sdata) return -EINVAL; if (tx_sdata == sdata) { sdata->vif.mbssid_tx_vif = &sdata->vif; } else { sdata->vif.mbssid_tx_vif = &tx_sdata->vif; link_conf->nontransmitted = true; link_conf->bssid_index = params.index; } if (params.ema) link_conf->ema_ap = true; return 0; } static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct wireless_dev *wdev; struct ieee80211_sub_if_data *sdata; int err; err = ieee80211_if_add(local, name, name_assign_type, &wdev, type, params); if (err) return ERR_PTR(err); sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (type == NL80211_IFTYPE_MONITOR) { err = ieee80211_set_mon_options(sdata, params); if (err) { ieee80211_if_remove(sdata); return NULL; } } return wdev; } static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_if_remove(IEEE80211_WDEV_TO_SUB_IF(wdev)); return 0; } static int ieee80211_change_iface(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret; lockdep_assert_wiphy(local->hw.wiphy); ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; if (type == NL80211_IFTYPE_AP_VLAN && params->use_4addr == 0) { RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (params->use_4addr == ifmgd->use_4addr) return 0; /* FIXME: no support for 4-addr MLO yet */ if (ieee80211_vif_is_mld(&sdata->vif)) return -EOPNOTSUPP; sdata->u.mgd.use_4addr = params->use_4addr; if (!ifmgd->associated) return 0; sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); if (sta) drv_sta_set_4addr(local, sdata, &sta->sta, params->use_4addr); if (params->use_4addr) ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { ret = ieee80211_set_mon_options(sdata, params); if (ret) return ret; } return 0; } static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0); if (ret < 0) return ret; return ieee80211_do_open(wdev, true); } static void ieee80211_stop_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } static int ieee80211_start_nan(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0); if (ret < 0) return ret; ret = ieee80211_do_open(wdev, true); if (ret) return ret; ret = drv_start_nan(sdata->local, sdata, conf); if (ret) ieee80211_sdata_stop(sdata); sdata->u.nan.conf = *conf; return ret; } static void ieee80211_stop_nan(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); drv_stop_nan(sdata->local, sdata); ieee80211_sdata_stop(sdata); } static int ieee80211_nan_change_conf(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_conf new_conf; int ret = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; new_conf = sdata->u.nan.conf; if (changes & CFG80211_NAN_CONF_CHANGED_PREF) new_conf.master_pref = conf->master_pref; if (changes & CFG80211_NAN_CONF_CHANGED_BANDS) new_conf.bands = conf->bands; ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); if (!ret) sdata->u.nan.conf = new_conf; return ret; } static int ieee80211_add_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_func *nan_func) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; spin_lock_bh(&sdata->u.nan.func_lock); ret = idr_alloc(&sdata->u.nan.function_inst_ids, nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, GFP_ATOMIC); spin_unlock_bh(&sdata->u.nan.func_lock); if (ret < 0) return ret; nan_func->instance_id = ret; WARN_ON(nan_func->instance_id == 0); ret = drv_add_nan_func(sdata->local, sdata, nan_func); if (ret) { spin_lock_bh(&sdata->u.nan.func_lock); idr_remove(&sdata->u.nan.function_inst_ids, nan_func->instance_id); spin_unlock_bh(&sdata->u.nan.func_lock); } return ret; } static struct cfg80211_nan_func * ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, u64 cookie) { struct cfg80211_nan_func *func; int id; lockdep_assert_held(&sdata->u.nan.func_lock); idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { if (func->cookie == cookie) return func; } return NULL; } static void ieee80211_del_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_func *func; u8 instance_id = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN || !ieee80211_sdata_running(sdata)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = ieee80211_find_nan_func_by_cookie(sdata, cookie); if (func) instance_id = func->instance_id; spin_unlock_bh(&sdata->u.nan.func_lock); if (instance_id) drv_del_nan_func(sdata->local, sdata, instance_id); } static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->noack_map = noack_map; ieee80211_check_fast_xmit_iface(sdata); return 0; } static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, const u8 *mac_addr, u8 key_idx) { struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; struct sta_info *sta; int ret = -EINVAL; if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) return -EINVAL; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return -EINVAL; if (sta->ptk_idx == key_idx) return 0; key = wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) ret = ieee80211_set_tx_key(key); return ret; } static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; struct ieee80211_key *key; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; if (IS_ERR(link)) return PTR_ERR(link); if (pairwise && params->mode == NL80211_KEY_SET_TX) return ieee80211_set_tx(sdata, mac_addr, key_idx); /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_WEP104: if (link_id >= 0) return -EINVAL; if (WARN_ON_ONCE(fips_enabled)) return -EINVAL; break; default: break; } key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); key->conf.link_id = link_id; if (pairwise) key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; if (params->mode == NL80211_KEY_NO_TX) key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX; if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); /* * The ASSOC test makes sure the driver is ready to * receive the key. When wpa_supplicant has roamed * using FT, it attempts to set the key before * association has completed, this rejects that attempt * so it will set the key again after association. * * TODO: accept the key if we have a station entry and * add it to the device after the station. */ if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free_unused(key); return -ENOENT; } } switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: /* no MFP (yet) */ break; case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; #endif case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_OCB: /* shouldn't happen */ WARN_ON_ONCE(1); break; } err = ieee80211_key_link(key, link, sta); /* KRACK protection, shouldn't happen but just silently accept key */ if (err == -EALREADY) err = 0; return err; } static struct ieee80211_key * ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_local *local __maybe_unused = sdata->local; struct ieee80211_link_data *link = &sdata->deflink; struct ieee80211_key *key; if (link_id >= 0) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return NULL; } if (mac_addr) { struct sta_info *sta; struct link_sta_info *link_sta; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return NULL; if (link_id >= 0) { link_sta = rcu_dereference_check(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) return NULL; } else { link_sta = &sta->deflink; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) return wiphy_dereference(local->hw.wiphy, link_sta->gtk[key_idx]); return NULL; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); key = wiphy_dereference(local->hw.wiphy, link->gtk[key_idx]); if (key) return key; /* or maybe it was a WEP key */ if (key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); return NULL; } static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; lockdep_assert_wiphy(local->hw.wiphy); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) return -ENOENT; ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION); return 0; } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *params)) { struct ieee80211_sub_if_data *sdata; u8 seq[6] = {0}; struct key_params params; struct ieee80211_key *key; u64 pn64; u32 iv32; u16 iv16; int err = -ENOENT; struct ieee80211_key_seq kseq = {}; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) goto out; memset(&params, 0, sizeof(params)); params.cipher = key->conf.cipher; switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: pn64 = atomic64_read(&key->conf.tx_pn); iv32 = TKIP_PN_TO_IV32(pn64); iv16 = TKIP_PN_TO_IV16(pn64); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); iv32 = kseq.tkip.iv32; iv16 = kseq.tkip.iv16; } seq[0] = iv16 & 0xff; seq[1] = (iv16 >> 8) & 0xff; seq[2] = iv32 & 0xff; seq[3] = (iv32 >> 8) & 0xff; seq[4] = (iv32 >> 16) & 0xff; seq[5] = (iv32 >> 24) & 0xff; params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_cmac)); fallthrough; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_gmac)); fallthrough; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), gcmp)); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); memcpy(seq, kseq.ccmp.pn, 6); } else { pn64 = atomic64_read(&key->conf.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; seq[2] = pn64 >> 16; seq[3] = pn64 >> 24; seq[4] = pn64 >> 32; seq[5] = pn64 >> 40; } params.seq = seq; params.seq_len = 6; break; default: if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) break; if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) break; drv_get_key_seq(sdata->local, key, &kseq); params.seq = kseq.hw.seq; params.seq_len = kseq.hw.seq_len; break; } params.key = key->conf.key; params.key_len = key->conf.keylen; callback(cookie, &params); err = 0; out: rcu_read_unlock(); return err; } static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool uni, bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_key(link, key_idx, uni, multi); return 0; } static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_mgmt_key(link, key_idx); return 0; } static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_beacon_key(link, key_idx); return 0; } void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo) { rinfo->flags = 0; if (rate->flags & IEEE80211_TX_RC_MCS) { rinfo->flags |= RATE_INFO_FLAGS_MCS; rinfo->mcs = rate->idx; } else if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; rinfo->mcs = ieee80211_rate_get_vht_mcs(rate); rinfo->nss = ieee80211_rate_get_vht_nss(rate); } else { struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sta->sdata); WARN_ON_ONCE(sband && !sband->bitrates); if (sband && sband->bitrates) rinfo->legacy = sband->bitrates[rate->idx].bitrate; } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_40; else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_80; else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_160; else rinfo->bw = RATE_INFO_BW_20; if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_by_idx(sdata, idx); if (sta) { ret = 0; memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, int idx, struct survey_info *survey) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); return drv_get_survey(local, idx, survey); } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (sta) { ret = 0; sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_set_monitor_channel(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; lockdep_assert_wiphy(local->hw.wiphy); if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, &chanreq.oper)) return 0; sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) goto done; if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, &chanreq.oper)) return 0; ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_EXCLUSIVE); if (ret) return ret; done: local->monitor_chanreq = chanreq; return 0; } static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, struct ieee80211_link_data *link) { struct probe_resp *new, *old; if (!resp || !resp_len) return 1; old = sdata_dereference(link->u.ap.probe_resp, sdata); new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = resp_len; memcpy(new->data, resp, resp_len); if (csa) memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp, csa->n_counter_offsets_presp * sizeof(new->cntdwn_counter_offsets[0])); else if (cca) new->cntdwn_counter_offsets[0] = cca->counter_offset_presp; rcu_assign_pointer(link->u.ap.probe_resp, new); if (old) kfree_rcu(old, rcu_head); return 0; } static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, struct cfg80211_fils_discovery *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct fils_discovery_data *new, *old = NULL; struct ieee80211_fils_discovery *fd; if (!params->update) return 0; fd = &link_conf->fils_discovery; fd->min_interval = params->min_interval; fd->max_interval = params->max_interval; old = sdata_dereference(link->u.ap.fils_discovery, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.fils_discovery, new); } else { RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); } *changed |= BSS_CHANGED_FILS_DISCOVERY; return 0; } static int ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, struct cfg80211_unsol_bcast_probe_resp *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct unsol_bcast_probe_resp_data *new, *old = NULL; if (!params->update) return 0; link_conf->unsol_bcast_probe_resp_interval = params->interval; old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); } else { RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); } *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; return 0; } static int ieee80211_set_ftm_responder_params( struct ieee80211_sub_if_data *sdata, const u8 *lci, size_t lci_len, const u8 *civicloc, size_t civicloc_len, struct ieee80211_bss_conf *link_conf) { struct ieee80211_ftm_responder_params *new, *old; u8 *pos; int len; if (!lci_len && !civicloc_len) return 0; old = link_conf->ftmr_params; len = lci_len + civicloc_len; new = kzalloc(sizeof(*new) + len, GFP_KERNEL); if (!new) return -ENOMEM; pos = (u8 *)(new + 1); if (lci_len) { new->lci_len = lci_len; new->lci = pos; memcpy(pos, lci, lci_len); pos += lci_len; } if (civicloc_len) { new->civicloc_len = civicloc_len; new->civicloc = pos; memcpy(pos, civicloc, civicloc_len); pos += civicloc_len; } link_conf->ftmr_params = new; kfree(old); return 0; } static int ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst, struct cfg80211_mbssid_elems *src) { int i, offset = 0; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } dst->cnt = src->cnt; return offset; } static int ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst, struct cfg80211_rnr_elems *src) { int i, offset = 0; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } dst->cnt = src->cnt; return offset; } static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_beacon_data *params, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, u64 *changed) { struct cfg80211_mbssid_elems *mbssid = NULL; struct cfg80211_rnr_elems *rnr = NULL; struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; u64 _changed = BSS_CHANGED_BEACON; struct ieee80211_bss_conf *link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); /* Need to have a beacon head if we don't have one yet */ if (!params->head && !old) return -EINVAL; /* new or old head? */ if (params->head) new_head_len = params->head_len; else new_head_len = old->head_len; /* new or old tail? */ if (params->tail || !old) /* params->tail_len will be zero for !params->tail */ new_tail_len = params->tail_len; else new_tail_len = old->tail_len; size = sizeof(*new) + new_head_len + new_tail_len; /* new or old multiple BSSID elements? */ if (params->mbssid_ies) { mbssid = params->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (params->rnr_ies) { rnr = params->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } else if (old && old->mbssid_ies) { mbssid = old->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (old && old->rnr_ies) { rnr = old->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } new = kzalloc(size, GFP_KERNEL); if (!new) return -ENOMEM; /* start filling the new info now */ /* * pointers go into the block we allocated, * memory is | beacon_data | head | tail | mbssid_ies | rnr_ies */ new->head = ((u8 *) new) + sizeof(*new); new->tail = new->head + new_head_len; new->head_len = new_head_len; new->tail_len = new_tail_len; /* copy in optional mbssid_ies */ if (mbssid) { u8 *pos = new->tail + new->tail_len; new->mbssid_ies = (void *)pos; pos += struct_size(new->mbssid_ies, elem, mbssid->cnt); pos += ieee80211_copy_mbssid_beacon(pos, new->mbssid_ies, mbssid); if (rnr) { new->rnr_ies = (void *)pos; pos += struct_size(new->rnr_ies, elem, rnr->cnt); ieee80211_copy_rnr_beacon(pos, new->rnr_ies, rnr); } /* update bssid_indicator */ link_conf->bssid_indicator = ilog2(__roundup_pow_of_two(mbssid->cnt + 1)); } if (csa) { new->cntdwn_current_counter = csa->count; memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon, csa->n_counter_offsets_beacon * sizeof(new->cntdwn_counter_offsets[0])); } else if (cca) { new->cntdwn_current_counter = cca->count; new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon; } /* copy in head */ if (params->head) memcpy(new->head, params->head, new_head_len); else memcpy(new->head, old->head, new_head_len); /* copy in optional tail */ if (params->tail) memcpy(new->tail, params->tail, new_tail_len); else if (old) memcpy(new->tail, old->tail, new_tail_len); err = ieee80211_set_probe_resp(sdata, params->probe_resp, params->probe_resp_len, csa, cca, link); if (err < 0) { kfree(new); return err; } if (err == 0) _changed |= BSS_CHANGED_AP_PROBE_RESP; if (params->ftm_responder != -1) { link_conf->ftm_responder = params->ftm_responder; err = ieee80211_set_ftm_responder_params(sdata, params->lci, params->lci_len, params->civicloc, params->civicloc_len, link_conf); if (err < 0) { kfree(new); return err; } _changed |= BSS_CHANGED_FTM_RESPONDER; } rcu_assign_pointer(link->u.ap.beacon, new); sdata->u.ap.active = true; if (old) kfree_rcu(old, rcu_head); *changed |= _changed; return 0; } static u8 ieee80211_num_beaconing_links(struct ieee80211_sub_if_data *sdata) { struct ieee80211_link_data *link; u8 link_id, num = 0; if (sdata->vif.type != NL80211_IFTYPE_AP && sdata->vif.type != NL80211_IFTYPE_P2P_GO) return num; if (!sdata->vif.valid_links) return num; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; if (sdata_dereference(link->u.ap.beacon, sdata)) num++; } return num; } static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct beacon_data *old; struct ieee80211_sub_if_data *vlan; u64 changed = BSS_CHANGED_BEACON_INT | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | BSS_CHANGED_TWT; int i, err; int prev_beacon_int; unsigned int link_id = params->beacon.link_id; struct ieee80211_link_data *link; struct ieee80211_bss_conf *link_conf; struct ieee80211_chan_req chanreq = { .oper = params->chandef }; lockdep_assert_wiphy(local->hw.wiphy); link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); if (old) return -EALREADY; if (params->smps_mode != NL80211_SMPS_OFF) return -EOPNOTSUPP; link->smps_mode = IEEE80211_SMPS_OFF; link->needed_rx_chains = sdata->local->rx_chains; prev_beacon_int = link_conf->beacon_int; link_conf->beacon_int = params->beacon_interval; if (params->ht_cap) link_conf->ht_ldpc = params->ht_cap->cap_info & cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING); if (params->vht_cap) { link_conf->vht_ldpc = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC); link_conf->vht_su_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); link_conf->vht_su_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); link_conf->vht_mu_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE); link_conf->vht_mu_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); } if (params->he_cap && params->he_oper) { link_conf->he_support = true; link_conf->htc_trig_based_pkt_ext = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK); link_conf->frame_time_rts_th = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); changed |= BSS_CHANGED_HE_OBSS_PD; if (params->beacon.he_bss_color.enabled) changed |= BSS_CHANGED_HE_BSS_COLOR; } if (params->he_cap) { link_conf->he_ldpc = params->he_cap->phy_cap_info[1] & IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD; link_conf->he_su_beamformer = params->he_cap->phy_cap_info[3] & IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; link_conf->he_su_beamformee = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE; link_conf->he_mu_beamformer = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER; link_conf->he_full_ul_mumimo = params->he_cap->phy_cap_info[2] & IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO; } if (params->eht_cap) { if (!link_conf->he_support) return -EOPNOTSUPP; link_conf->eht_support = true; link_conf->eht_su_beamformer = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER; link_conf->eht_su_beamformee = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; link_conf->eht_mu_beamformer = params->eht_cap->fixed.phy_cap_info[7] & (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); } else { link_conf->eht_su_beamformer = false; link_conf->eht_su_beamformee = false; link_conf->eht_mu_beamformer = false; } if (sdata->vif.type == NL80211_IFTYPE_AP && params->mbssid_config.tx_wdev) { err = ieee80211_set_ap_mbssid_options(sdata, params->mbssid_config, link_conf); if (err) return err; } err = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); if (!err) ieee80211_link_copy_chanctx_to_vlans(link, false); if (err) { link_conf->beacon_int = prev_beacon_int; return err; } /* * Apply control port protocol, this allows us to * not encrypt dynamic WEP control frames. */ sdata->control_port_protocol = params->crypto.control_port_ethertype; sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; sdata->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; sdata->control_port_no_preauth = params->crypto.control_port_no_preauth; list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->control_port_protocol = params->crypto.control_port_ethertype; vlan->control_port_no_encrypt = params->crypto.control_port_no_encrypt; vlan->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; vlan->control_port_no_preauth = params->crypto.control_port_no_preauth; } link_conf->dtim_period = params->dtim_period; link_conf->enable_beacon = true; link_conf->allow_p2p_go_ps = sdata->vif.p2p; link_conf->twt_responder = params->twt_responder; link_conf->he_obss_pd = params->he_obss_pd; link_conf->he_bss_color = params->beacon.he_bss_color; sdata->vif.cfg.s1g = params->chandef.chan->band == NL80211_BAND_S1GHZ; sdata->vif.cfg.ssid_len = params->ssid_len; if (params->ssid_len) memcpy(sdata->vif.cfg.ssid, params->ssid, params->ssid_len); link_conf->hidden_ssid = (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); memset(&link_conf->p2p_noa_attr, 0, sizeof(link_conf->p2p_noa_attr)); link_conf->p2p_noa_attr.oppps_ctwindow = params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; if (params->p2p_opp_ps) link_conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = params->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) link_conf->beacon_tx_rate = params->beacon_rate; err = ieee80211_assign_beacon(sdata, link, &params->beacon, NULL, NULL, &changed); if (err < 0) goto error; err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery, link, link_conf, &changed); if (err < 0) goto error; err = ieee80211_set_unsol_bcast_probe_resp(sdata, &params->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) goto error; err = drv_start_ap(sdata->local, sdata, link_conf); if (err) { old = sdata_dereference(link->u.ap.beacon, sdata); if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(link->u.ap.beacon, NULL); sdata->u.ap.active = false; goto error; } ieee80211_recalc_dtim(local, sdata); ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID); ieee80211_link_info_change_notify(sdata, link, changed); if (ieee80211_num_beaconing_links(sdata) <= 1) netif_carrier_on(dev); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_on(vlan->dev); return 0; error: ieee80211_link_release_channel(link); return err; } static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_update *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct cfg80211_beacon_data *beacon = &params->beacon; struct beacon_data *old; int err; struct ieee80211_bss_conf *link_conf; u64 changed = 0; lockdep_assert_wiphy(wiphy); link = sdata_dereference(sdata->link[beacon->link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; /* don't allow changing the beacon while a countdown is in place - offset * of channel switch counter may change */ if (link_conf->csa_active || link_conf->color_change_active) return -EBUSY; old = sdata_dereference(link->u.ap.beacon, sdata); if (!old) return -ENOENT; err = ieee80211_assign_beacon(sdata, link, beacon, NULL, NULL, &changed); if (err < 0) return err; err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery, link, link_conf, &changed); if (err < 0) return err; err = ieee80211_set_unsol_bcast_probe_resp(sdata, &params->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) return err; if (beacon->he_bss_color_valid && beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled; changed |= BSS_CHANGED_HE_BSS_COLOR; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static void ieee80211_free_next_beacon(struct ieee80211_link_data *link) { if (!link->u.ap.next_beacon) return; kfree(link->u.ap.next_beacon->mbssid_ies); kfree(link->u.ap.next_beacon->rnr_ies); kfree(link->u.ap.next_beacon); link->u.ap.next_beacon = NULL; } static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_sub_if_data *vlan; struct ieee80211_local *local = sdata->local; struct beacon_data *old_beacon; struct probe_resp *old_probe_resp; struct fils_discovery_data *old_fils_discovery; struct unsol_bcast_probe_resp_data *old_unsol_bcast_probe_resp; struct cfg80211_chan_def chandef; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct ieee80211_bss_conf *link_conf = link->conf; LIST_HEAD(keys); lockdep_assert_wiphy(local->hw.wiphy); old_beacon = sdata_dereference(link->u.ap.beacon, sdata); if (!old_beacon) return -ENOENT; old_probe_resp = sdata_dereference(link->u.ap.probe_resp, sdata); old_fils_discovery = sdata_dereference(link->u.ap.fils_discovery, sdata); old_unsol_bcast_probe_resp = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); /* abort any running channel switch or color change */ link_conf->csa_active = false; link_conf->color_change_active = false; if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); sdata->csa_blocked_tx = false; } ieee80211_free_next_beacon(link); /* turn off carrier for this interface and dependent VLANs */ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_off(vlan->dev); if (ieee80211_num_beaconing_links(sdata) <= 1) netif_carrier_off(dev); /* remove beacon and probe response */ sdata->u.ap.active = false; RCU_INIT_POINTER(link->u.ap.beacon, NULL); RCU_INIT_POINTER(link->u.ap.probe_resp, NULL); RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); kfree_rcu(old_beacon, rcu_head); if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); if (old_fils_discovery) kfree_rcu(old_fils_discovery, rcu_head); if (old_unsol_bcast_probe_resp) kfree_rcu(old_unsol_bcast_probe_resp, rcu_head); kfree(link_conf->ftmr_params); link_conf->ftmr_params = NULL; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; __sta_info_flush(sdata, true, link_id); ieee80211_remove_link_keys(link, &keys); if (!list_empty(&keys)) { synchronize_net(); ieee80211_free_key_list(local, &keys); } link_conf->enable_beacon = false; sdata->beacon_rate_set = false; sdata->vif.cfg.ssid_len = 0; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_BEACON_ENABLED); if (sdata->wdev.cac_started) { chandef = link_conf->chanreq.oper; wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); } drv_stop_ap(sdata->local, sdata, link_conf); /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); ieee80211_link_copy_chanctx_to_vlans(link, true); ieee80211_link_release_channel(link); return 0; } static int sta_apply_auth_flags(struct ieee80211_local *local, struct sta_info *sta, u32 mask, u32 set) { int ret; if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && set & BIT(NL80211_STA_FLAG_ASSOCIATED) && !test_sta_flag(sta, WLAN_STA_ASSOC)) { /* * When peer becomes associated, init rate control as * well. Some drivers require rate control initialized * before drv_sta_state() is called. */ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) rate_control_rate_init(sta); ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); else ret = 0; if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && test_sta_flag(sta, WLAN_STA_ASSOC)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_NONE); if (ret) return ret; } return 0; } static void sta_apply_mesh_params(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { #ifdef CONFIG_MAC80211_MESH struct ieee80211_sub_if_data *sdata = sta->sdata; u64 changed = 0; if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) changed = mesh_plink_inc_estab_count(sdata); sta->mesh->plink_state = params->plink_state; sta->mesh->aid = params->peer_aid; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, sdata->u.mesh.mshcfg.power_mode); ewma_mesh_tx_rate_avg_init(&sta->mesh->tx_rate_avg); /* init at low value */ ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, 10); break; case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: case NL80211_PLINK_OPN_SNT: case NL80211_PLINK_OPN_RCVD: case NL80211_PLINK_CNF_RCVD: case NL80211_PLINK_HOLDING: if (sta->mesh->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->mesh->plink_state = params->plink_state; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, NL80211_MESH_POWER_UNKNOWN); break; default: /* nothing */ break; } } switch (params->plink_action) { case NL80211_PLINK_ACTION_NO_ACTION: /* nothing */ break; case NL80211_PLINK_ACTION_OPEN: changed |= mesh_plink_open(sta); break; case NL80211_PLINK_ACTION_BLOCK: changed |= mesh_plink_block(sta); break; } if (params->local_pm) changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); ieee80211_mbss_info_change_notify(sdata, changed); #endif } static int sta_link_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, bool new_link, struct link_station_parameters *params) { int ret = 0; struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 link_id = params->link_id < 0 ? 0 : params->link_id; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); /* * If there are no changes, then accept a link that exist, * unless it's a new link. */ if (params->link_id >= 0 && !new_link && !params->link_mac && !params->txpwr_set && !params->supported_rates_len && !params->ht_capa && !params->vht_capa && !params->he_capa && !params->eht_capa && !params->opmode_notif_used) return 0; if (!link || !link_sta) return -EINVAL; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->link_mac) { if (new_link) { memcpy(link_sta->addr, params->link_mac, ETH_ALEN); memcpy(link_sta->pub->addr, params->link_mac, ETH_ALEN); } else if (!ether_addr_equal(link_sta->addr, params->link_mac)) { return -EINVAL; } } else if (new_link) { return -EINVAL; } if (params->txpwr_set) { link_sta->pub->txpwr.type = params->txpwr.type; if (params->txpwr.type == NL80211_TX_POWER_LIMITED) link_sta->pub->txpwr.power = params->txpwr.power; ret = drv_sta_set_txpwr(local, sdata, sta); if (ret) return ret; } if (params->supported_rates && params->supported_rates_len) { ieee80211_parse_bitrates(link->conf->chanreq.oper.width, sband, params->supported_rates, params->supported_rates_len, &link_sta->pub->supp_rates[sband->band]); } if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, params->ht_capa, link_sta); /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, params->vht_capa, NULL, link_sta); if (params->he_capa) ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, (void *)params->he_capa, params->he_capa_len, (void *)params->he_6ghz_capa, link_sta); if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, (u8 *)params->he_capa, params->he_capa_len, params->eht_capa, params->eht_capa_len, link_sta); if (params->opmode_notif_used) { /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ __ieee80211_vht_handle_opmode(sdata, link_sta, params->opmode_notif, sband->band); } ieee80211_sta_init_nss(link_sta); return ret; } static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 mask, set; int ret = 0; mask = params->sta_flags_mask; set = params->sta_flags_set; if (ieee80211_vif_is_mesh(&sdata->vif)) { /* * In mesh mode, ASSOCIATED isn't part of the nl80211 * API but must follow AUTHENTICATED for driver state. */ if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { /* * TDLS -- everything follows authorized, but * only becoming authorized is possible, not * going back */ if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); } } if (mask & BIT(NL80211_STA_FLAG_WME) && local->hw.queues >= IEEE80211_NUM_ACS) sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); /* auth flags will be set later for TDLS, * and for unassociated stations that move to associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) && (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) set_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); else clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); } if (mask & BIT(NL80211_STA_FLAG_MFP)) { sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP)); if (set & BIT(NL80211_STA_FLAG_MFP)) set_sta_flag(sta, WLAN_STA_MFP); else clear_sta_flag(sta, WLAN_STA_MFP); } if (mask & BIT(NL80211_STA_FLAG_TDLS_PEER)) { if (set & BIT(NL80211_STA_FLAG_TDLS_PEER)) set_sta_flag(sta, WLAN_STA_TDLS_PEER); else clear_sta_flag(sta, WLAN_STA_TDLS_PEER); } if (mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) sta->sta.spp_amsdu = set & BIT(NL80211_STA_FLAG_SPP_AMSDU); /* mark TDLS channel switch support, if the AP allows it */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->deflink.u.mgd.tdls_chan_switch_prohibited && params->ext_capab_len >= 4 && params->ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH) set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->u.mgd.tdls_wider_bw_prohibited && ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && params->ext_capab_len >= 8 && params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED) set_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW); if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) { sta->sta.uapsd_queues = params->uapsd_queues; sta->sta.max_sp = params->max_sp; } ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab, params->ext_capab_len); /* * cfg80211 validates this (1-2007) and allows setting the AID * only when creating a new station entry */ if (params->aid) sta->sta.aid = params->aid; /* * Some of the following updates would be racy if called on an * existing station, via ieee80211_change_station(). However, * all such changes are rejected by cfg80211 except for updates * changing the supported rates on an existing but not yet used * TDLS peer. */ if (params->listen_interval >= 0) sta->listen_interval = params->listen_interval; ret = sta_link_apply_parameters(local, sta, false, &params->link_sta_params); if (ret) return ret; if (params->support_p2p_ps >= 0) sta->sta.support_p2p_ps = params->support_p2p_ps; if (ieee80211_vif_is_mesh(&sdata->vif)) sta_apply_mesh_params(local, sta, params); if (params->airtime_weight) sta->airtime_weight = params->airtime_weight; /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || set & BIT(NL80211_STA_FLAG_ASSOCIATED)) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } /* Mark the STA as MLO if MLD MAC address is available */ if (params->link_sta_params.mld_mac) sta->sta.mlo = true; return 0; } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; lockdep_assert_wiphy(local->hw.wiphy); if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; if (!is_valid_ether_addr(mac)) return -EINVAL; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && sdata->vif.type == NL80211_IFTYPE_STATION && !sdata->u.mgd.associated) return -EINVAL; /* * If we have a link ID, it can be a non-MLO station on an AP MLD, * but we need to have a link_mac in that case as well, so use the * STA's MAC address in that case. */ if (params->link_sta_params.link_id >= 0) sta = sta_info_alloc_with_link(sdata, mac, params->link_sta_params.link_id, params->link_sta_params.link_mac ?: mac, GFP_KERNEL); else sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; /* Though the mutex is not needed here (since the station is not * visible yet), sta_apply_parameters (and inner functions) require * the mutex due to other paths. */ err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); return err; } /* * for TDLS and for unassociated station, rate control should be * initialized only when rates are known and station is marked * authorized/associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); return sta_info_insert(sta); } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, struct station_del_parameters *params) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (params->mac) return sta_info_destroy_addr_bss(sdata, params->mac); sta_info_flush(sdata, params->link_id); return 0; } static int ieee80211_change_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; enum cfg80211_station_type statype; int err; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (!sta) return -ENOENT; switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: if (sdata->u.mesh.user_mpm) statype = CFG80211_STA_MESH_PEER_USER; else statype = CFG80211_STA_MESH_PEER_KERNEL; break; case NL80211_IFTYPE_ADHOC: statype = CFG80211_STA_IBSS; break; case NL80211_IFTYPE_STATION: if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { statype = CFG80211_STA_AP_STA; break; } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) statype = CFG80211_STA_TDLS_PEER_ACTIVE; else statype = CFG80211_STA_TDLS_PEER_SETUP; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: if (test_sta_flag(sta, WLAN_STA_ASSOC)) statype = CFG80211_STA_AP_CLIENT; else statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; default: return -EOPNOTSUPP; } err = cfg80211_check_station_change(wiphy, params, statype); if (err) return err; if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) return -EBUSY; rcu_assign_pointer(vlansdata->u.vlan.sta, sta); __ieee80211_check_fast_rx_iface(vlansdata); drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); } } err = sta_apply_parameters(local, sta, params); if (err) return err; if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); } return 0; } #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_add(sdata, dst); if (IS_ERR(mpath)) { rcu_read_unlock(); return PTR_ERR(mpath); } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (dst) return mesh_path_del(sdata, dst); mesh_path_flush_by_iface(sdata); return 0; } static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, struct mpath_info *pinfo) { struct sta_info *next_hop_sta = rcu_dereference(mpath->next_hop); if (next_hop_sta) memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN); else eth_zero_addr(next_hop); memset(pinfo, 0, sizeof(*pinfo)); pinfo->generation = mpath->sdata->u.mesh.mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | MPATH_INFO_SN | MPATH_INFO_METRIC | MPATH_INFO_EXPTIME | MPATH_INFO_DISCOVERY_TIMEOUT | MPATH_INFO_DISCOVERY_RETRIES | MPATH_INFO_FLAGS | MPATH_INFO_HOP_COUNT | MPATH_INFO_PATH_CHANGE; pinfo->frame_qlen = mpath->frame_queue.qlen; pinfo->sn = mpath->sn; pinfo->metric = mpath->metric; if (time_before(jiffies, mpath->exp_time)) pinfo->exptime = jiffies_to_msecs(mpath->exp_time - jiffies); pinfo->discovery_timeout = jiffies_to_msecs(mpath->discovery_timeout); pinfo->discovery_retries = mpath->discovery_retries; if (mpath->flags & MESH_PATH_ACTIVE) pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; if (mpath->flags & MESH_PATH_SN_VALID) pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; if (mpath->flags & MESH_PATH_RESOLVED) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED; pinfo->hop_count = mpath->hop_count; pinfo->path_change_count = mpath->path_change_count; } static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static void mpp_set_pinfo(struct mesh_path *mpath, u8 *mpp, struct mpath_info *pinfo) { memset(pinfo, 0, sizeof(*pinfo)); memcpy(mpp, mpath->mpp, ETH_ALEN); pinfo->generation = mpath->sdata->u.mesh.mpp_paths_generation; } static int ieee80211_get_mpp(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpp(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_get_mesh_config(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); return 0; } static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) { return (mask >> (parm-1)) & 0x1; } static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); int i; /* allocate information elements */ new_ie = NULL; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, GFP_KERNEL); if (!new_ie) return -ENOMEM; } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); ifmsh->mesh_sp_id = setup->sync_method; ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; ifmsh->userspace_handles_dfs = setup->userspace_handles_dfs; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; if (setup->is_secure) ifmsh->security |= IEEE80211_MESH_SEC_SECURED; /* mcast rate setting in Mesh Node */ memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); sdata->vif.bss_conf.basic_rates = setup->basic_rates; sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(sdata->local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = setup->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } return 0; } static int ieee80211_update_mesh_config(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf) { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_mesh *ifmsh; sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifmsh = &sdata->u.mesh; /* Set the config options which we are interested in setting */ conf = &(sdata->u.mesh.mshcfg); if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) conf->dot11MeshRetryTimeout = nconf->dot11MeshRetryTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONFIRM_TIMEOUT, mask)) conf->dot11MeshConfirmTimeout = nconf->dot11MeshConfirmTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HOLDING_TIMEOUT, mask)) conf->dot11MeshHoldingTimeout = nconf->dot11MeshHoldingTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_PEER_LINKS, mask)) conf->dot11MeshMaxPeerLinks = nconf->dot11MeshMaxPeerLinks; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_RETRIES, mask)) conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) conf->dot11MeshTTL = nconf->dot11MeshTTL; if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) conf->element_ttl = nconf->element_ttl; if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) { if (ifmsh->user_mpm) return -EBUSY; conf->auto_open_plinks = nconf->auto_open_plinks; } if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask)) conf->dot11MeshNbrOffsetMaxNeighbor = nconf->dot11MeshNbrOffsetMaxNeighbor; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) conf->dot11MeshHWMPmaxPREQretries = nconf->dot11MeshHWMPmaxPREQretries; if (_chg_mesh_attr(NL80211_MESHCONF_PATH_REFRESH_TIME, mask)) conf->path_refresh_time = nconf->path_refresh_time; if (_chg_mesh_attr(NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, mask)) conf->min_discovery_timeout = nconf->min_discovery_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathTimeout = nconf->dot11MeshHWMPactivePathTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask)) conf->dot11MeshHWMPpreqMinInterval = nconf->dot11MeshHWMPpreqMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, mask)) conf->dot11MeshHWMPperrMinInterval = nconf->dot11MeshHWMPperrMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, mask)) conf->dot11MeshHWMPnetDiameterTraversalTime = nconf->dot11MeshHWMPnetDiameterTraversalTime; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOTMODE, mask)) { conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; ieee80211_mesh_root_setup(ifmsh); } if (_chg_mesh_attr(NL80211_MESHCONF_GATE_ANNOUNCEMENTS, mask)) { /* our current gate announcement implementation rides on root * announcements, so require this ifmsh to also be a root node * */ if (nconf->dot11MeshGateAnnouncementProtocol && !(conf->dot11MeshHWMPRootMode > IEEE80211_ROOTMODE_ROOT)) { conf->dot11MeshHWMPRootMode = IEEE80211_PROACTIVE_RANN; ieee80211_mesh_root_setup(ifmsh); } conf->dot11MeshGateAnnouncementProtocol = nconf->dot11MeshGateAnnouncementProtocol; } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) conf->dot11MeshHWMPRannInterval = nconf->dot11MeshHWMPRannInterval; if (_chg_mesh_attr(NL80211_MESHCONF_FORWARDING, mask)) conf->dot11MeshForwarding = nconf->dot11MeshForwarding; if (_chg_mesh_attr(NL80211_MESHCONF_RSSI_THRESHOLD, mask)) { /* our RSSI threshold implementation is supported only for * devices that report signal in dBm. */ if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM)) return -EOPNOTSUPP; conf->rssi_threshold = nconf->rssi_threshold; } if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) { conf->ht_opmode = nconf->ht_opmode; sdata->vif.bss_conf.ht_operation_mode = nconf->ht_opmode; ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_HT); } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathToRootTimeout = nconf->dot11MeshHWMPactivePathToRootTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) conf->dot11MeshHWMProotInterval = nconf->dot11MeshHWMProotInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) conf->dot11MeshHWMPconfirmationInterval = nconf->dot11MeshHWMPconfirmationInterval; if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) { conf->power_mode = nconf->power_mode; ieee80211_mps_local_status_update(sdata); } if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) conf->dot11MeshAwakeWindowDuration = nconf->dot11MeshAwakeWindowDuration; if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask)) conf->plink_timeout = nconf->plink_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask)) conf->dot11MeshConnectedToMeshGate = nconf->dot11MeshConnectedToMeshGate; if (_chg_mesh_attr(NL80211_MESHCONF_NOLEARN, mask)) conf->dot11MeshNolearn = nconf->dot11MeshNolearn; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_AS, mask)) conf->dot11MeshConnectedToAuthServer = nconf->dot11MeshConnectedToAuthServer; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, const struct mesh_config *conf, const struct mesh_setup *setup) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = setup->chandef }; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; int err; lockdep_assert_wiphy(sdata->local->hw.wiphy); memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); err = copy_mesh_setup(ifmsh, setup); if (err) return err; sdata->control_port_over_nl80211 = setup->control_port_over_nl80211; /* can mesh use other SMPS modes? */ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; return ieee80211_start_mesh(sdata); } static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); ieee80211_stop_mesh(sdata); ieee80211_link_release_channel(&sdata->deflink); kfree(sdata->u.mesh.ie); return 0; } #endif static int ieee80211_change_bss(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_supported_band *sband; u64 changed = 0; link = ieee80211_link_or_deflink(sdata, params->link_id, true); if (IS_ERR(link)) return PTR_ERR(link); if (!sdata_dereference(link->u.ap.beacon, sdata)) return -ENOENT; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->basic_rates) { if (!ieee80211_parse_bitrates(link->conf->chanreq.oper.width, wiphy->bands[sband->band], params->basic_rates, params->basic_rates_len, &link->conf->basic_rates)) return -EINVAL; changed |= BSS_CHANGED_BASIC_RATES; ieee80211_check_rate_mask(link); } if (params->use_cts_prot >= 0) { link->conf->use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (params->use_short_preamble >= 0) { link->conf->use_short_preamble = params->use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } if (!link->conf->use_short_slot && (sband->band == NL80211_BAND_5GHZ || sband->band == NL80211_BAND_6GHZ)) { link->conf->use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } if (params->use_short_slot_time >= 0) { link->conf->use_short_slot = params->use_short_slot_time; changed |= BSS_CHANGED_ERP_SLOT; } if (params->ap_isolate >= 0) { if (params->ap_isolate) sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; else sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; ieee80211_check_fast_rx_iface(sdata); } if (params->ht_opmode >= 0) { link->conf->ht_operation_mode = (u16)params->ht_opmode; changed |= BSS_CHANGED_HT; } if (params->p2p_ctwindow >= 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; link->conf->p2p_noa_attr.oppps_ctwindow |= params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; changed |= BSS_CHANGED_P2P_PS; } if (params->p2p_opp_ps > 0) { link->conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } else if (params->p2p_opp_ps == 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static int ieee80211_set_txq_params(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, params->link_id, true); struct ieee80211_tx_queue_params p; if (!local->ops->conf_tx) return -EOPNOTSUPP; if (local->hw.queues < IEEE80211_NUM_ACS) return -EOPNOTSUPP; if (IS_ERR(link)) return PTR_ERR(link); memset(&p, 0, sizeof(p)); p.aifs = params->aifs; p.cw_max = params->cwmax; p.cw_min = params->cwmin; p.txop = params->txop; /* * Setting tx queue params disables u-apsd because it's only * called in master mode. */ p.uapsd = false; ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac); link->tx_conf[params->ac] = p; if (drv_conf_tx(local, link, params->ac, &p)) { wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for AC %d\n", params->ac); return -EINVAL; } ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_QOS); return 0; } #ifdef CONFIG_PM static int ieee80211_suspend(struct wiphy *wiphy, struct cfg80211_wowlan *wowlan) { return __ieee80211_suspend(wiphy_priv(wiphy), wowlan); } static int ieee80211_resume(struct wiphy *wiphy) { return __ieee80211_resume(wiphy_priv(wiphy)); } #else #define ieee80211_suspend NULL #define ieee80211_resume NULL #endif static int ieee80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *req) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(req->wdev); switch (ieee80211_vif_type_p2p(&sdata->vif)) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) break; /* * FIXME: implement NoA while scanning in software, * for now fall through to allow scanning only when * beaconing hasn't been configured yet */ fallthrough; case NL80211_IFTYPE_AP: /* * If the scan has been forced (and the driver supports * forcing), don't care about being beaconing already. * This will create problems to the attached stations (e.g. all * the frames sent while scanning on other channel will be * lost) */ if (sdata->deflink.u.ap.beacon && (!(wiphy->features & NL80211_FEATURE_AP_SCAN) || !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } return ieee80211_request_scan(sdata, req); } static void ieee80211_abort_scan(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_scan_cancel(wiphy_priv(wiphy)); } static int ieee80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_sched_scan_request *req) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!sdata->local->ops->sched_scan_start) return -EOPNOTSUPP; return ieee80211_request_sched_scan_start(sdata, req); } static int ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev, u64 reqid) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->sched_scan_stop) return -EOPNOTSUPP; return ieee80211_request_sched_scan_stop(local); } static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_auth_request *req) { return ieee80211_mgd_auth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_assoc_request *req) { return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_deauth_request *req) { return ieee80211_mgd_deauth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req) { return ieee80211_mgd_disassoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *params) { return ieee80211_ibss_join(IEEE80211_DEV_TO_SUB_IF(dev), params); } static int ieee80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ibss_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_join_ocb(struct wiphy *wiphy, struct net_device *dev, struct ocb_setup *setup) { return ieee80211_ocb_join(IEEE80211_DEV_TO_SUB_IF(dev), setup); } static int ieee80211_leave_ocb(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ocb_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, int rate[NUM_NL80211_BANDS]) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(int) * NUM_NL80211_BANDS); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MCAST_RATE); return 0; } static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) { struct ieee80211_local *local = wiphy_priv(wiphy); int err; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { ieee80211_check_fast_xmit_all(local); err = drv_set_frag_threshold(local, wiphy->frag_threshold); if (err) { ieee80211_check_fast_xmit_all(local); return err; } } if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || (changed & WIPHY_PARAM_DYN_ACK)) { s16 coverage_class; coverage_class = changed & WIPHY_PARAM_COVERAGE_CLASS ? wiphy->coverage_class : -1; err = drv_set_coverage_class(local, coverage_class); if (err) return err; } if (changed & WIPHY_PARAM_RTS_THRESHOLD) { err = drv_set_rts_threshold(local, wiphy->rts_threshold); if (err) return err; } if (changed & WIPHY_PARAM_RETRY_SHORT) { if (wiphy->retry_short > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; } if (changed & WIPHY_PARAM_RETRY_LONG) { if (wiphy->retry_long > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; } if (changed & (WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); if (changed & (WIPHY_PARAM_TXQ_LIMIT | WIPHY_PARAM_TXQ_MEMORY_LIMIT | WIPHY_PARAM_TXQ_QUANTUM)) ieee80211_txq_set_params(local); return 0; } static int ieee80211_set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; enum nl80211_tx_power_setting txp_type = type; bool update_txp_type = false; bool has_monitor = false; lockdep_assert_wiphy(local->hw.wiphy); if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) return -EOPNOTSUPP; } switch (type) { case NL80211_TX_POWER_AUTOMATIC: sdata->deflink.user_power_level = IEEE80211_UNSET_POWER_LEVEL; txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: if (mbm < 0 || (mbm % 100)) return -EOPNOTSUPP; sdata->deflink.user_power_level = MBM_TO_DBM(mbm); break; } if (txp_type != sdata->vif.bss_conf.txpower_type) { update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; } ieee80211_recalc_txpower(sdata, update_txp_type); return 0; } switch (type) { case NL80211_TX_POWER_AUTOMATIC: local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: if (mbm < 0 || (mbm % 100)) return -EOPNOTSUPP; local->user_power_level = MBM_TO_DBM(mbm); break; } list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { has_monitor = true; continue; } sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; } list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; ieee80211_recalc_txpower(sdata, update_txp_type); } if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; ieee80211_recalc_txpower(sdata, update_txp_type); } } return 0; } static int ieee80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (local->ops->get_txpower) return drv_get_txpower(local, sdata, dbm); if (local->emulate_chanctx) *dbm = local->hw.conf.power_level; else *dbm = sdata->vif.bss_conf.txpower; /* INT_MIN indicates no power level was set yet */ if (*dbm == INT_MIN) return -EINVAL; return 0; } static void ieee80211_rfkill_poll(struct wiphy *wiphy) { struct ieee80211_local *local = wiphy_priv(wiphy); drv_rfkill_poll(local); } #ifdef CONFIG_NL80211_TESTMODE static int ieee80211_testmode_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_vif *vif = NULL; if (!local->ops->testmode_cmd) return -EOPNOTSUPP; if (wdev) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) vif = &sdata->vif; } return local->ops->testmode_cmd(&local->hw, vif, data, len); } static int ieee80211_testmode_dump(struct wiphy *wiphy, struct sk_buff *skb, struct netlink_callback *cb, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->testmode_dump) return -EOPNOTSUPP; return local->ops->testmode_dump(&local->hw, skb, cb, data, len); } #endif int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode) { const u8 *ap; enum ieee80211_smps_mode old_req; int err; struct sta_info *sta; bool tdls_peer_found = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return 0; old_req = link->u.mgd.req_smps; link->u.mgd.req_smps = smps_mode; /* The driver indicated that EML is enabled for the interface, which * implies that SMPS flows towards the AP should be stopped. */ if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) return 0; if (old_req == smps_mode && smps_mode != IEEE80211_SMPS_AUTOMATIC) return 0; /* * If not associated, or current association is not an HT * association, there's no need to do anything, just store * the new value until we associate. */ if (!sdata->u.mgd.associated || link->conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; ap = sdata->vif.cfg.ap_addr; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) continue; tdls_peer_found = true; break; } rcu_read_unlock(); if (smps_mode == IEEE80211_SMPS_AUTOMATIC) { if (tdls_peer_found || !sdata->u.mgd.powersave) smps_mode = IEEE80211_SMPS_OFF; else smps_mode = IEEE80211_SMPS_DYNAMIC; } /* send SM PS frame to AP */ err = ieee80211_send_smps_action(sdata, smps_mode, ap, ap, ieee80211_vif_is_mld(&sdata->vif) ? link->link_id : -1); if (err) link->u.mgd.req_smps = old_req; else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found) ieee80211_teardown_tdls_peers(link); return err; } static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); unsigned int link_id; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && timeout == local->dynamic_ps_forced_timeout) return 0; sdata->u.mgd.powersave = enabled; local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; __ieee80211_request_smps_mgd(sdata, link, link->u.mgd.req_smps); } if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); ieee80211_check_fast_rx_iface(sdata); return 0; } static void ieee80211_set_cqm_rssi_link(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, s32 rssi_thold, u32 rssi_hyst, s32 rssi_low, s32 rssi_high) { struct ieee80211_bss_conf *conf; if (!link || !link->conf) return; conf = link->conf; if (rssi_thold && rssi_hyst && rssi_thold == conf->cqm_rssi_thold && rssi_hyst == conf->cqm_rssi_hyst) return; conf->cqm_rssi_thold = rssi_thold; conf->cqm_rssi_hyst = rssi_hyst; conf->cqm_rssi_low = rssi_low; conf->cqm_rssi_high = rssi_high; link->u.mgd.last_cqm_event_signal = 0; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return; if (sdata->u.mgd.associated && (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_CQM); } static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER && !(vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, rssi_thold, rssi_hyst, 0, 0); } return 0; } static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_low, s32 rssi_high) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, 0, 0, rssi_low, rssi_high); } return 0; } static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int i, ret; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; /* * If active validate the setting and reject it if it doesn't leave * at least one basic rate usable, since we really have to be able * to send something, and if we're an AP we have to be able to do * so at a basic rate so that all clients can receive it. */ if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) && sdata->vif.bss_conf.chanreq.oper.chan) { u32 basic_rates = sdata->vif.bss_conf.basic_rates; enum nl80211_band band; band = sdata->vif.bss_conf.chanreq.oper.chan->band; if (!(mask->control[band].legacy & basic_rates)) return -EINVAL; } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { ret = drv_set_bitrate_mask(local, sdata, mask); if (ret) return ret; } for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; sdata->rc_rateidx_mask[i] = mask->control[i].legacy; memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs, sizeof(mask->control[i].ht_mcs)); memcpy(sdata->rc_rateidx_vht_mcs_mask[i], mask->control[i].vht_mcs, sizeof(mask->control[i].vht_mcs)); sdata->rc_has_mcs_mask[i] = false; sdata->rc_has_vht_mcs_mask[i] = false; if (!sband) continue; for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) { if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) { sdata->rc_has_mcs_mask[i] = true; break; } } for (j = 0; j < NL80211_VHT_NSS_MAX; j++) { if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) { sdata->rc_has_vht_mcs_mask[i] = true; break; } } } return 0; } static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, u32 cac_time_ms) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = *chandef }; struct ieee80211_local *local = sdata->local; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) { err = -EBUSY; goto out_unlock; } /* whatever, but channel contexts should not complain about that one */ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = local->rx_chains; err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) goto out_unlock; wiphy_delayed_work_queue(wiphy, &sdata->deflink.dfs_cac_timer_work, msecs_to_jiffies(cac_time_ms)); out_unlock: return err; } static void ieee80211_end_cac(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { /* it might be waiting for the local->mtx, but then * by the time it gets it, sdata->wdev.cac_started * will no longer be true */ wiphy_delayed_work_cancel(wiphy, &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { ieee80211_link_release_channel(&sdata->deflink); sdata->wdev.cac_started = false; } } } static struct cfg80211_beacon_data * cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) { struct cfg80211_beacon_data *new_beacon; u8 *pos; int len; len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len + beacon->proberesp_ies_len + beacon->assocresp_ies_len + beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len; if (beacon->mbssid_ies) len += ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies, beacon->rnr_ies, beacon->mbssid_ies->cnt); new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL); if (!new_beacon) return NULL; if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { new_beacon->mbssid_ies = kzalloc(struct_size(new_beacon->mbssid_ies, elem, beacon->mbssid_ies->cnt), GFP_KERNEL); if (!new_beacon->mbssid_ies) { kfree(new_beacon); return NULL; } if (beacon->rnr_ies && beacon->rnr_ies->cnt) { new_beacon->rnr_ies = kzalloc(struct_size(new_beacon->rnr_ies, elem, beacon->rnr_ies->cnt), GFP_KERNEL); if (!new_beacon->rnr_ies) { kfree(new_beacon->mbssid_ies); kfree(new_beacon); return NULL; } } } pos = (u8 *)(new_beacon + 1); if (beacon->head_len) { new_beacon->head_len = beacon->head_len; new_beacon->head = pos; memcpy(pos, beacon->head, beacon->head_len); pos += beacon->head_len; } if (beacon->tail_len) { new_beacon->tail_len = beacon->tail_len; new_beacon->tail = pos; memcpy(pos, beacon->tail, beacon->tail_len); pos += beacon->tail_len; } if (beacon->beacon_ies_len) { new_beacon->beacon_ies_len = beacon->beacon_ies_len; new_beacon->beacon_ies = pos; memcpy(pos, beacon->beacon_ies, beacon->beacon_ies_len); pos += beacon->beacon_ies_len; } if (beacon->proberesp_ies_len) { new_beacon->proberesp_ies_len = beacon->proberesp_ies_len; new_beacon->proberesp_ies = pos; memcpy(pos, beacon->proberesp_ies, beacon->proberesp_ies_len); pos += beacon->proberesp_ies_len; } if (beacon->assocresp_ies_len) { new_beacon->assocresp_ies_len = beacon->assocresp_ies_len; new_beacon->assocresp_ies = pos; memcpy(pos, beacon->assocresp_ies, beacon->assocresp_ies_len); pos += beacon->assocresp_ies_len; } if (beacon->probe_resp_len) { new_beacon->probe_resp_len = beacon->probe_resp_len; new_beacon->probe_resp = pos; memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { pos += ieee80211_copy_mbssid_beacon(pos, new_beacon->mbssid_ies, beacon->mbssid_ies); if (beacon->rnr_ies && beacon->rnr_ies->cnt) pos += ieee80211_copy_rnr_beacon(pos, new_beacon->rnr_ies, beacon->rnr_ies); } /* might copy -1, meaning no changes requested */ new_beacon->ftm_responder = beacon->ftm_responder; if (beacon->lci) { new_beacon->lci_len = beacon->lci_len; new_beacon->lci = pos; memcpy(pos, beacon->lci, beacon->lci_len); pos += beacon->lci_len; } if (beacon->civicloc) { new_beacon->civicloc_len = beacon->civicloc_len; new_beacon->civicloc = pos; memcpy(pos, beacon->civicloc, beacon->civicloc_len); pos += beacon->civicloc_len; } return new_beacon; } void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link_data = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link_data)) { rcu_read_unlock(); return; } /* TODO: MBSSID with MLO changes */ if (vif->mbssid_tx_vif == vif) { /* Trigger ieee80211_csa_finish() on the non-transmitting * interfaces when channel switch is received on * transmitting interface */ struct ieee80211_sub_if_data *iter; list_for_each_entry_rcu(iter, &local->interfaces, list) { if (!ieee80211_sdata_running(iter)) continue; if (iter == sdata || iter->vif.mbssid_tx_vif != vif) continue; wiphy_work_queue(iter->local->hw.wiphy, &iter->deflink.csa_finalize_work); } } wiphy_work_queue(local->hw.wiphy, &link_data->csa_finalize_work); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_csa_finish); void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_tx) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; sdata->csa_blocked_tx = block_tx; sdata_info(sdata, "channel switch failed, disconnecting\n"); wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work); } EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); static int ieee80211_set_after_csa_beacon(struct ieee80211_link_data *link_data, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: if (!link_data->u.ap.next_beacon) return -EINVAL; err = ieee80211_assign_beacon(sdata, link_data, link_data->u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(link_data); if (err < 0) return err; break; case NL80211_IFTYPE_ADHOC: err = ieee80211_ibss_finish_csa(sdata, changed); if (err < 0) return err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata, changed); if (err < 0) return err; break; #endif default: WARN_ON(1); return -EINVAL; } return 0; } static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_bss_conf *link_conf = link_data->conf; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); /* * using reservation isn't immediate as it may be deferred until later * with multi-vif. once reservation is complete it will re-schedule the * work with no reserved_chanctx so verify chandef to check if it * completed successfully */ if (link_data->reserved_chanctx) { /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their * reservations */ if (link_data->reserved_ready) return 0; return ieee80211_link_use_reserved_context(link_data); } if (!cfg80211_chandef_identical(&link_conf->chanreq.oper, &link_data->csa_chanreq.oper)) return -EINVAL; link_conf->csa_active = false; err = ieee80211_set_after_csa_beacon(link_data, &changed); if (err) return err; ieee80211_link_info_change_notify(sdata, link_data, changed); if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); sdata->csa_blocked_tx = false; } err = drv_post_channel_switch(link_data); if (err) return err; cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chanreq.oper, link_data->link_id); return 0; } static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; if (__ieee80211_csa_finalize(link_data)) { sdata_info(sdata, "failed to finalize CSA on link %d, disconnecting\n", link_data->link_id); cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, GFP_KERNEL); } } void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, csa_finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!link->conf->csa_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_csa_finalize(link); } static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data, struct cfg80211_csa_settings *params, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_csa_settings csa = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: link_data->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_after); if (!link_data->u.ap.next_beacon) return -ENOMEM; /* * With a count of 0, we don't have to wait for any * TBTT before switching, so complete the CSA * immediately. In theory, with a count == 1 we * should delay the switch until just before the next * TBTT, but that would complicate things so we switch * immediately too. If we would delay the switch * until the next TBTT, we would have to set the probe * response here. * * TODO: A channel switch with count <= 1 without * sending a CSA action frame is kind of useless, * because the clients won't know we're changing * channels. The action frame must be implemented * either here or in the userspace. */ if (params->count <= 1) break; if ((params->n_counter_offsets_beacon > IEEE80211_MAX_CNTDWN_COUNTERS_NUM) || (params->n_counter_offsets_presp > IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) { ieee80211_free_next_beacon(link_data); return -EINVAL; } csa.counter_offsets_beacon = params->counter_offsets_beacon; csa.counter_offsets_presp = params->counter_offsets_presp; csa.n_counter_offsets_beacon = params->n_counter_offsets_beacon; csa.n_counter_offsets_presp = params->n_counter_offsets_presp; csa.count = params->count; err = ieee80211_assign_beacon(sdata, link_data, &params->beacon_csa, &csa, NULL, changed); if (err < 0) { ieee80211_free_next_beacon(link_data); return err; } break; case NL80211_IFTYPE_ADHOC: if (!sdata->vif.cfg.ibss_joined) return -EINVAL; if (params->chandef.width != sdata->u.ibss.chandef.width) return -EINVAL; switch (params->chandef.width) { case NL80211_CHAN_WIDTH_40: if (cfg80211_get_chandef_type(&params->chandef) != cfg80211_get_chandef_type(&sdata->u.ibss.chandef)) return -EINVAL; break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: break; default: return -EINVAL; } /* changes into another band are not supported */ if (sdata->u.ibss.chandef.chan->band != params->chandef.chan->band) return -EINVAL; /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_ibss_csa_beacon(sdata, params, changed); if (err < 0) return err; } ieee80211_send_action_csa(sdata, params); break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* changes into another band are not supported */ if (sdata->vif.bss_conf.chanreq.oper.chan->band != params->chandef.chan->band) return -EINVAL; if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_NONE) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_INIT; if (!ifmsh->pre_value) ifmsh->pre_value = 1; else ifmsh->pre_value++; } /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_mesh_csa_beacon(sdata, params, changed); if (err < 0) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; return err; } } if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) ieee80211_send_action_csa(sdata, params); break; } #endif default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata) { sdata->vif.bss_conf.color_change_active = false; ieee80211_free_next_beacon(&sdata->deflink); cfg80211_color_change_aborted_notify(sdata->dev); } static int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = params->chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_channel_switch ch_switch = { .link_id = params->link_id, }; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; struct ieee80211_bss_conf *link_conf; struct ieee80211_link_data *link_data; u64 changed = 0; u8 link_id = params->link_id; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; if (sdata->wdev.cac_started) return -EBUSY; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return -EINVAL; link_data = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link_data) return -ENOLINK; link_conf = link_data->conf; if (chanreq.oper.punctured && !link_conf->eht_support) return -EINVAL; /* don't allow another channel switch if one is already active. */ if (link_conf->csa_active) return -EBUSY; conf = wiphy_dereference(wiphy, link_conf->chanctx_conf); if (!conf) { err = -EBUSY; goto out; } if (params->chandef.chan->freq_offset) { /* this may work, but is untested */ err = -EOPNOTSUPP; goto out; } chanctx = container_of(conf, struct ieee80211_chanctx, conf); ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; ch_switch.block_tx = params->block_tx; ch_switch.chandef = chanreq.oper; ch_switch.count = params->count; err = drv_pre_channel_switch(sdata, &ch_switch); if (err) goto out; err = ieee80211_link_reserve_chanctx(link_data, &chanreq, chanctx->mode, params->radar_required); if (err) goto out; /* if reservation is invalid then this will fail */ err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } /* if there is a color change in progress, abort it */ if (link_conf->color_change_active) ieee80211_color_change_abort(sdata); err = ieee80211_set_csa_beacon(link_data, params, &changed); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } link_data->csa_chanreq = chanreq; link_conf->csa_active = true; if (params->block_tx && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); sdata->csa_blocked_tx = true; } cfg80211_ch_switch_started_notify(sdata->dev, &link_data->csa_chanreq.oper, 0, params->count, params->block_tx); if (changed) { ieee80211_link_info_change_notify(sdata, link_data, changed); drv_channel_switch_beacon(sdata, &link_data->csa_chanreq.oper); } else { /* if the beacon didn't change, we can finalize immediately */ ieee80211_csa_finalize(link_data); } out: return err; } int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); return __ieee80211_channel_switch(wiphy, dev, params); } u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) { lockdep_assert_wiphy(local->hw.wiphy); local->roc_cookie_counter++; /* wow, you wrapped 64 bits ... more likely a bug */ if (WARN_ON(local->roc_cookie_counter == 0)) local->roc_cookie_counter++; return local->roc_cookie_counter; } int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; int id; ack_skb = skb_copy(skb, gfp); if (!ack_skb) return -ENOMEM; spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); if (id < 0) { kfree_skb(ack_skb); return -ENOMEM; } IEEE80211_SKB_CB(skb)->status_data_idr = 1; IEEE80211_SKB_CB(skb)->status_data = id; *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; return 0; } static void ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, struct wireless_dev *wdev, struct mgmt_frame_regs *upd) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4); u32 action_mask = BIT(IEEE80211_STYPE_ACTION >> 4); bool global_change, intf_change; global_change = (local->probe_req_reg != !!(upd->global_stypes & preq_mask)) || (local->rx_mcast_action_reg != !!(upd->global_mcast_stypes & action_mask)); local->probe_req_reg = upd->global_stypes & preq_mask; local->rx_mcast_action_reg = upd->global_mcast_stypes & action_mask; intf_change = (sdata->vif.probe_req_reg != !!(upd->interface_stypes & preq_mask)) || (sdata->vif.rx_mcast_action_reg != !!(upd->interface_mcast_stypes & action_mask)); sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask; sdata->vif.rx_mcast_action_reg = upd->interface_mcast_stypes & action_mask; if (!local->open_count) return; if (intf_change && ieee80211_sdata_running(sdata)) drv_config_iface_filter(local, sdata, sdata->vif.probe_req_reg ? FIF_PROBE_REQ : 0, FIF_PROBE_REQ); if (global_change) ieee80211_configure_filter(local); } static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); int ret; if (local->started) return -EOPNOTSUPP; ret = drv_set_antenna(local, tx_ant, rx_ant); if (ret) return ret; local->rx_chains = hweight8(rx_ant); return 0; } static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); return drv_get_antenna(local, tx_ant, rx_ant); } static int ieee80211_set_rekey_data(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_gtk_rekey_data *data) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!local->ops->set_rekey_data) return -EOPNOTSUPP; drv_set_rekey_data(local, sdata, data); return 0; } static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos; struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; enum nl80211_band band; int ret; /* the lock is needed to assign the cookie later */ lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); sta = sta_info_get_bss(sdata, peer); if (!sta) { ret = -ENOLINK; goto unlock; } qos = sta->sta.wme; chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { ret = -EINVAL; goto unlock; } band = chanctx_conf->def.chan->band; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | IEEE80211_FCTL_FROMDS); } else { size -= 2; fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS); } skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); if (!skb) { ret = -ENOMEM; goto unlock; } skb->dev = dev; skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); nullfunc->seq_ctrl = 0; info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_NL80211_FRAME_TX; info->band = band; skb_set_queue_mapping(skb, IEEE80211_AC_VO); skb->priority = 7; if (qos) nullfunc->qos_ctrl = cpu_to_le16(7); ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_ATOMIC); if (ret) { kfree_skb(skb); goto unlock; } local_bh_disable(); ieee80211_xmit(sdata, sta, skb); local_bh_enable(); ret = 0; unlock: rcu_read_unlock(); return ret; } static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_link_data *link; int ret = -ENODATA; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (!link) { ret = -ENOLINK; goto out; } chanctx_conf = rcu_dereference(link->conf->chanctx_conf); if (chanctx_conf) { *chandef = link->conf->chanreq.oper; ret = 0; } else if (local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { *chandef = local->monitor_chanreq.oper; ret = 0; } out: rcu_read_unlock(); return ret; } #ifdef CONFIG_PM static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) { drv_set_wakeup(wiphy_priv(wiphy), enabled); } #endif static int ieee80211_set_qos_map(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_qos_map *qos_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mac80211_qos_map *new_qos_map, *old_qos_map; if (qos_map) { new_qos_map = kzalloc(sizeof(*new_qos_map), GFP_KERNEL); if (!new_qos_map) return -ENOMEM; memcpy(&new_qos_map->qos_map, qos_map, sizeof(*qos_map)); } else { /* A NULL qos_map was passed to disable QoS mapping */ new_qos_map = NULL; } old_qos_map = sdata_dereference(sdata->qos_map, sdata); rcu_assign_pointer(sdata->qos_map, new_qos_map); if (old_qos_map) kfree_rcu(old_qos_map, rcu_head); return 0; } static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; u64 changed = 0; link = sdata_dereference(sdata->link[link_id], sdata); ret = ieee80211_link_change_chanreq(link, &chanreq, &changed); if (ret == 0) ieee80211_link_info_change_notify(sdata, link, changed); return ret; } static int ieee80211_add_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer, u8 up, u16 admitted_time) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int ac = ieee802_1d_to_ac[up]; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(sdata->wmm_acm & BIT(up))) return -EINVAL; if (ifmgd->tx_tspec[ac].admitted_time) return -EBUSY; if (admitted_time) { ifmgd->tx_tspec[ac].admitted_time = 32 * admitted_time; ifmgd->tx_tspec[ac].tsid = tsid; ifmgd->tx_tspec[ac].up = up; } return 0; } static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = wiphy_priv(wiphy); int ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; /* skip unused entries */ if (!tx_tspec->admitted_time) continue; if (tx_tspec->tsid != tsid) continue; /* due to this new packets will be reassigned to non-ACM ACs */ tx_tspec->up = -1; /* Make sure that all packets have been sent to avoid to * restore the QoS params on packets that are still on the * queues. */ synchronize_net(); ieee80211_flush_queues(local, sdata, false); /* restore the normal QoS parameters * (unconditionally to avoid races) */ tx_tspec->action = TX_TSPEC_ACTION_STOP_DOWNGRADE; tx_tspec->downgraded = false; ieee80211_sta_handle_tspec_ac_params(sdata); /* finally clear all the data */ memset(tx_tspec, 0, sizeof(*tx_tspec)); return 0; } return -ENOENT; } void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, u8 inst_id, enum nl80211_nan_func_term_reason reason, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; u64 cookie; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } cookie = func->cookie; idr_remove(&sdata->u.nan.function_inst_ids, inst_id); spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_free_nan_func(func); cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, reason, cookie, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_terminated); void ieee80211_nan_func_match(struct ieee80211_vif *vif, struct cfg80211_nan_match_params *match, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } match->cookie = func->cookie; spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_match); static int ieee80211_set_multicast_to_unicast(struct wiphy *wiphy, struct net_device *dev, const bool enabled) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->u.ap.multicast_to_unicast = enabled; return 0; } void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats, struct txq_info *txqi) { if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_BYTES); txqstats->backlog_bytes = txqi->tin.backlog_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS); txqstats->backlog_packets = txqi->tin.backlog_packets; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_FLOWS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_FLOWS); txqstats->flows = txqi->tin.flows; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_DROPS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_DROPS); txqstats->drops = txqi->cstats.drop_count; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_ECN_MARKS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_ECN_MARKS); txqstats->ecn_marks = txqi->cstats.ecn_mark; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_OVERLIMIT))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_OVERLIMIT); txqstats->overlimit = txqi->tin.overlimit; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_COLLISIONS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_COLLISIONS); txqstats->collisions = txqi->tin.collisions; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_BYTES); txqstats->tx_bytes = txqi->tin.tx_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_PACKETS); txqstats->tx_packets = txqi->tin.tx_packets; } } static int ieee80211_get_txq_stats(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_txq_stats *txqstats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; int ret = 0; spin_lock_bh(&local->fq.lock); rcu_read_lock(); if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (!sdata->vif.txq) { ret = 1; goto out; } ieee80211_fill_txq_stats(txqstats, to_txq_info(sdata->vif.txq)); } else { /* phy stats */ txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS) | BIT(NL80211_TXQ_STATS_BACKLOG_BYTES) | BIT(NL80211_TXQ_STATS_OVERLIMIT) | BIT(NL80211_TXQ_STATS_OVERMEMORY) | BIT(NL80211_TXQ_STATS_COLLISIONS) | BIT(NL80211_TXQ_STATS_MAX_FLOWS); txqstats->backlog_packets = local->fq.backlog; txqstats->backlog_bytes = local->fq.memory_usage; txqstats->overlimit = local->fq.overlimit; txqstats->overmemory = local->fq.overmemory; txqstats->collisions = local->fq.collisions; txqstats->max_flows = local->fq.flows_cnt; } out: rcu_read_unlock(); spin_unlock_bh(&local->fq.lock); return ret; } static int ieee80211_get_ftm_responder_stats(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ftm_responder_stats *ftm_stats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); return drv_get_ftm_responder_stats(local, sdata, ftm_stats); } static int ieee80211_start_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_start_pmsr(local, sdata, request); } static void ieee80211_abort_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_abort_pmsr(local, sdata, request); } static int ieee80211_set_tid_config(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_tid_config *tid_conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->set_tid_config) return -EOPNOTSUPP; if (!tid_conf->peer) return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf); sta = sta_info_get_bss(sdata, tid_conf->peer); if (!sta) return -ENOENT; return drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); } static int ieee80211_reset_tid_config(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 tids) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->reset_tid_config) return -EOPNOTSUPP; if (!peer) return drv_reset_tid_config(sdata->local, sdata, NULL, tids); sta = sta_info_get_bss(sdata, peer); if (!sta) return -ENOENT; return drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); } static int ieee80211_set_sar_specs(struct wiphy *wiphy, struct cfg80211_sar_specs *sar) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_sar_specs) return -EOPNOTSUPP; return local->ops->set_sar_specs(&local->hw, sar); } static int ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, u64 *changed) { switch (sdata->vif.type) { case NL80211_IFTYPE_AP: { int ret; if (!sdata->deflink.u.ap.next_beacon) return -EINVAL; ret = ieee80211_assign_beacon(sdata, &sdata->deflink, sdata->deflink.u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(&sdata->deflink); if (ret < 0) return ret; break; } default: WARN_ON_ONCE(1); return -EINVAL; } return 0; } static int ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_color_change_settings *params, u64 *changed) { struct ieee80211_color_change_settings color_change = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: sdata->deflink.u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_next); if (!sdata->deflink.u.ap.next_beacon) return -ENOMEM; if (params->count <= 1) break; color_change.counter_offset_beacon = params->counter_offset_beacon; color_change.counter_offset_presp = params->counter_offset_presp; color_change.count = params->count; err = ieee80211_assign_beacon(sdata, &sdata->deflink, &params->beacon_color_change, NULL, &color_change, changed); if (err < 0) { ieee80211_free_next_beacon(&sdata->deflink); return err; } break; default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, u8 color, int enable, u64 changed) { lockdep_assert_wiphy(sdata->local->hw.wiphy); sdata->vif.bss_conf.he_bss_color.color = color; sdata->vif.bss_conf.he_bss_color.enabled = enable; changed |= BSS_CHANGED_HE_BSS_COLOR; ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { struct ieee80211_sub_if_data *child; list_for_each_entry(child, &sdata->local->interfaces, list) { if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { child->vif.bss_conf.he_bss_color.color = color; child->vif.bss_conf.he_bss_color.enabled = enable; ieee80211_link_info_change_notify(child, &child->deflink, BSS_CHANGED_HE_BSS_COLOR); } } } } static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); sdata->vif.bss_conf.color_change_active = false; err = ieee80211_set_after_color_change_beacon(sdata, &changed); if (err) { cfg80211_color_change_aborted_notify(sdata->dev); return err; } ieee80211_color_change_bss_config_notify(sdata, sdata->vif.bss_conf.color_change_color, 1, changed); cfg80211_color_change_notify(sdata->dev); return 0; } void ieee80211_color_change_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, deflink.color_change_finalize_work); struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.bss_conf.color_change_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_color_change_finalize(sdata); } void ieee80211_color_collision_detection_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct ieee80211_link_data *link = container_of(delayed_work, struct ieee80211_link_data, color_collision_detect_work); struct ieee80211_sub_if_data *sdata = link->sdata; cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap); } void ieee80211_color_change_finish(struct ieee80211_vif *vif) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->deflink.color_change_finalize_work); } EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, u64 color_bitmap) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link = &sdata->deflink; if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) return; if (delayed_work_pending(&link->color_collision_detect_work)) return; link->color_bitmap = color_bitmap; /* queue the color collision detection event every 500 ms in order to * avoid sending too much netlink messages to userspace. */ ieee80211_queue_delayed_work(&sdata->local->hw, &link->color_collision_detect_work, msecs_to_jiffies(500)); } EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify); static int ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_color_change_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); if (sdata->vif.bss_conf.nontransmitted) return -EINVAL; /* don't allow another color change if one is already active or if csa * is active */ if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) { err = -EBUSY; goto out; } err = ieee80211_set_color_change_beacon(sdata, params, &changed); if (err) goto out; sdata->vif.bss_conf.color_change_active = true; sdata->vif.bss_conf.color_change_color = params->color; cfg80211_color_change_started_notify(sdata->dev, params->count); if (changed) ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed); else /* if the beacon didn't change, we can finalize immediately */ ieee80211_color_change_finalize(sdata); out: return err; } static int ieee80211_set_radar_background(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_radar_background) return -EOPNOTSUPP; return local->ops->set_radar_background(&local->hw, chandef); } static int ieee80211_add_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); lockdep_assert_wiphy(sdata->local->hw.wiphy); if (wdev->use_4addr) return -EOPNOTSUPP; return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static void ieee80211_del_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); lockdep_assert_wiphy(sdata->local->hw.wiphy); ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static int sta_add_link_station(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct link_station_parameters *params) { struct sta_info *sta; int ret; sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!sta->sta.valid_links) return -EINVAL; if (sta->sta.valid_links & BIT(params->link_id)) return -EALREADY; ret = ieee80211_sta_allocate_link(sta, params->link_id); if (ret) return ret; ret = sta_link_apply_parameters(local, sta, true, params); if (ret) { ieee80211_sta_free_link(sta, params->link_id); return ret; } /* ieee80211_sta_activate_link frees the link upon failure */ return ieee80211_sta_activate_link(sta, params->link_id); } static int ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); lockdep_assert_wiphy(sdata->local->hw.wiphy); return sta_add_link_station(local, sdata, params); } static int sta_mod_link_station(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct link_station_parameters *params) { struct sta_info *sta; sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; return sta_link_apply_parameters(local, sta, false, params); } static int ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); lockdep_assert_wiphy(sdata->local->hw.wiphy); return sta_mod_link_station(local, sdata, params); } static int sta_del_link_station(struct ieee80211_sub_if_data *sdata, struct link_station_del_parameters *params) { struct sta_info *sta; sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; /* must not create a STA without links */ if (sta->sta.valid_links == BIT(params->link_id)) return -EINVAL; ieee80211_sta_remove_link(sta, params->link_id); return 0; } static int ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_del_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); return sta_del_link_station(sdata, params); } static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_set_hw_timestamp *hwts) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; if (!local->ops->set_hw_timestamp) return -EOPNOTSUPP; if (!check_sdata_in_driver(sdata)) return -EIO; return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); } static int ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); return ieee80211_req_neg_ttlm(sdata, params); } const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, .change_virtual_intf = ieee80211_change_iface, .start_p2p_device = ieee80211_start_p2p_device, .stop_p2p_device = ieee80211_stop_p2p_device, .add_key = ieee80211_add_key, .del_key = ieee80211_del_key, .get_key = ieee80211_get_key, .set_default_key = ieee80211_config_default_key, .set_default_mgmt_key = ieee80211_config_default_mgmt_key, .set_default_beacon_key = ieee80211_config_default_beacon_key, .start_ap = ieee80211_start_ap, .change_beacon = ieee80211_change_beacon, .stop_ap = ieee80211_stop_ap, .add_station = ieee80211_add_station, .del_station = ieee80211_del_station, .change_station = ieee80211_change_station, .get_station = ieee80211_get_station, .dump_station = ieee80211_dump_station, .dump_survey = ieee80211_dump_survey, #ifdef CONFIG_MAC80211_MESH .add_mpath = ieee80211_add_mpath, .del_mpath = ieee80211_del_mpath, .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, .get_mpp = ieee80211_get_mpp, .dump_mpp = ieee80211_dump_mpp, .update_mesh_config = ieee80211_update_mesh_config, .get_mesh_config = ieee80211_get_mesh_config, .join_mesh = ieee80211_join_mesh, .leave_mesh = ieee80211_leave_mesh, #endif .join_ocb = ieee80211_join_ocb, .leave_ocb = ieee80211_leave_ocb, .change_bss = ieee80211_change_bss, .inform_bss = ieee80211_inform_bss, .set_txq_params = ieee80211_set_txq_params, .set_monitor_channel = ieee80211_set_monitor_channel, .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, .abort_scan = ieee80211_abort_scan, .sched_scan_start = ieee80211_sched_scan_start, .sched_scan_stop = ieee80211_sched_scan_stop, .auth = ieee80211_auth, .assoc = ieee80211_assoc, .deauth = ieee80211_deauth, .disassoc = ieee80211_disassoc, .join_ibss = ieee80211_join_ibss, .leave_ibss = ieee80211_leave_ibss, .set_mcast_rate = ieee80211_set_mcast_rate, .set_wiphy_params = ieee80211_set_wiphy_params, .set_tx_power = ieee80211_set_tx_power, .get_tx_power = ieee80211_get_tx_power, .rfkill_poll = ieee80211_rfkill_poll, CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) CFG80211_TESTMODE_DUMP(ieee80211_testmode_dump) .set_power_mgmt = ieee80211_set_power_mgmt, .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, .mgmt_tx = ieee80211_mgmt_tx, .mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config, .update_mgmt_frame_registrations = ieee80211_update_mgmt_frame_registrations, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, .set_rekey_data = ieee80211_set_rekey_data, .tdls_oper = ieee80211_tdls_oper, .tdls_mgmt = ieee80211_tdls_mgmt, .tdls_channel_switch = ieee80211_tdls_channel_switch, .tdls_cancel_channel_switch = ieee80211_tdls_cancel_channel_switch, .probe_client = ieee80211_probe_client, .set_noack_map = ieee80211_set_noack_map, #ifdef CONFIG_PM .set_wakeup = ieee80211_set_wakeup, #endif .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, .end_cac = ieee80211_end_cac, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, .nan_change_conf = ieee80211_nan_change_conf, .add_nan_func = ieee80211_add_nan_func, .del_nan_func = ieee80211_del_nan_func, .set_multicast_to_unicast = ieee80211_set_multicast_to_unicast, .tx_control_port = ieee80211_tx_control_port, .get_txq_stats = ieee80211_get_txq_stats, .get_ftm_responder_stats = ieee80211_get_ftm_responder_stats, .start_pmsr = ieee80211_start_pmsr, .abort_pmsr = ieee80211_abort_pmsr, .probe_mesh_link = ieee80211_probe_mesh_link, .set_tid_config = ieee80211_set_tid_config, .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, .color_change = ieee80211_color_change, .set_radar_background = ieee80211_set_radar_background, .add_intf_link = ieee80211_add_intf_link, .del_intf_link = ieee80211_del_intf_link, .add_link_station = ieee80211_add_link_station, .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, .set_hw_timestamp = ieee80211_set_hw_timestamp, .set_ttlm = ieee80211_set_ttlm, };
15 17 11 11 6 1 1 17 11 20 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 // SPDX-License-Identifier: GPL-2.0 /* * udc.c - Core UDC Framework * * Copyright (C) 2016 Intel Corporation * Author: Felipe Balbi <felipe.balbi@linux.intel.com> */ #undef TRACE_SYSTEM #define TRACE_SYSTEM gadget #if !defined(__UDC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define __UDC_TRACE_H #include <linux/types.h> #include <linux/tracepoint.h> #include <asm/byteorder.h> #include <linux/usb/gadget.h> DECLARE_EVENT_CLASS(udc_log_gadget, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret), TP_STRUCT__entry( __field(enum usb_device_speed, speed) __field(enum usb_device_speed, max_speed) __field(enum usb_device_state, state) __field(unsigned, mA) __field(unsigned, sg_supported) __field(unsigned, is_otg) __field(unsigned, is_a_peripheral) __field(unsigned, b_hnp_enable) __field(unsigned, a_hnp_support) __field(unsigned, hnp_polling_support) __field(unsigned, host_request_flag) __field(unsigned, quirk_ep_out_aligned_size) __field(unsigned, quirk_altset_not_supp) __field(unsigned, quirk_stall_not_supp) __field(unsigned, quirk_zlp_not_supp) __field(unsigned, is_selfpowered) __field(unsigned, deactivated) __field(unsigned, connected) __field(int, ret) ), TP_fast_assign( __entry->speed = g->speed; __entry->max_speed = g->max_speed; __entry->state = g->state; __entry->mA = g->mA; __entry->sg_supported = g->sg_supported; __entry->is_otg = g->is_otg; __entry->is_a_peripheral = g->is_a_peripheral; __entry->b_hnp_enable = g->b_hnp_enable; __entry->a_hnp_support = g->a_hnp_support; __entry->hnp_polling_support = g->hnp_polling_support; __entry->host_request_flag = g->host_request_flag; __entry->quirk_ep_out_aligned_size = g->quirk_ep_out_aligned_size; __entry->quirk_altset_not_supp = g->quirk_altset_not_supp; __entry->quirk_stall_not_supp = g->quirk_stall_not_supp; __entry->quirk_zlp_not_supp = g->quirk_zlp_not_supp; __entry->is_selfpowered = g->is_selfpowered; __entry->deactivated = g->deactivated; __entry->connected = g->connected; __entry->ret = ret; ), TP_printk("speed %d/%d state %d %dmA [%s%s%s%s%s%s%s%s%s%s%s%s%s%s] --> %d", __entry->speed, __entry->max_speed, __entry->state, __entry->mA, __entry->sg_supported ? "sg:" : "", __entry->is_otg ? "OTG:" : "", __entry->is_a_peripheral ? "a_peripheral:" : "", __entry->b_hnp_enable ? "b_hnp:" : "", __entry->a_hnp_support ? "a_hnp:" : "", __entry->hnp_polling_support ? "hnp_poll:" : "", __entry->host_request_flag ? "hostreq:" : "", __entry->quirk_ep_out_aligned_size ? "out_aligned:" : "", __entry->quirk_altset_not_supp ? "no_altset:" : "", __entry->quirk_stall_not_supp ? "no_stall:" : "", __entry->quirk_zlp_not_supp ? "no_zlp" : "", __entry->is_selfpowered ? "self-powered:" : "bus-powered:", __entry->deactivated ? "deactivated:" : "activated:", __entry->connected ? "connected" : "disconnected", __entry->ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_frame_number, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_clear_selfpowered, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_connect, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_draw, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_disconnect, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_connect, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_disconnect, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_deactivate, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DEFINE_EVENT(udc_log_gadget, usb_gadget_activate, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) ); DECLARE_EVENT_CLASS(udc_log_ep, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret), TP_STRUCT__entry( __string(name, ep->name) __field(unsigned, maxpacket) __field(unsigned, maxpacket_limit) __field(unsigned, max_streams) __field(unsigned, mult) __field(unsigned, maxburst) __field(u8, address) __field(bool, claimed) __field(bool, enabled) __field(int, ret) ), TP_fast_assign( __assign_str(name, ep->name); __entry->maxpacket = ep->maxpacket; __entry->maxpacket_limit = ep->maxpacket_limit; __entry->max_streams = ep->max_streams; __entry->mult = ep->mult; __entry->maxburst = ep->maxburst; __entry->address = ep->address, __entry->claimed = ep->claimed; __entry->enabled = ep->enabled; __entry->ret = ret; ), TP_printk("%s: mps %d/%d streams %d mult %d burst %d addr %02x %s%s --> %d", __get_str(name), __entry->maxpacket, __entry->maxpacket_limit, __entry->max_streams, __entry->mult, __entry->maxburst, __entry->address, __entry->claimed ? "claimed:" : "released:", __entry->enabled ? "enabled" : "disabled", ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_set_maxpacket_limit, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_enable, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_disable, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_set_halt, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_clear_halt, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_set_wedge, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_fifo_status, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DEFINE_EVENT(udc_log_ep, usb_ep_fifo_flush, TP_PROTO(struct usb_ep *ep, int ret), TP_ARGS(ep, ret) ); DECLARE_EVENT_CLASS(udc_log_req, TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret), TP_ARGS(ep, req, ret), TP_STRUCT__entry( __string(name, ep->name) __field(unsigned, length) __field(unsigned, actual) __field(unsigned, num_sgs) __field(unsigned, num_mapped_sgs) __field(unsigned, stream_id) __field(unsigned, no_interrupt) __field(unsigned, zero) __field(unsigned, short_not_ok) __field(int, status) __field(int, ret) __field(struct usb_request *, req) ), TP_fast_assign( __assign_str(name, ep->name); __entry->length = req->length; __entry->actual = req->actual; __entry->num_sgs = req->num_sgs; __entry->num_mapped_sgs = req->num_mapped_sgs; __entry->stream_id = req->stream_id; __entry->no_interrupt = req->no_interrupt; __entry->zero = req->zero; __entry->short_not_ok = req->short_not_ok; __entry->status = req->status; __entry->ret = ret; __entry->req = req; ), TP_printk("%s: req %p length %d/%d sgs %d/%d stream %d %s%s%s status %d --> %d", __get_str(name),__entry->req, __entry->actual, __entry->length, __entry->num_mapped_sgs, __entry->num_sgs, __entry->stream_id, __entry->zero ? "Z" : "z", __entry->short_not_ok ? "S" : "s", __entry->no_interrupt ? "i" : "I", __entry->status, __entry->ret ) ); DEFINE_EVENT(udc_log_req, usb_ep_alloc_request, TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret), TP_ARGS(ep, req, ret) ); DEFINE_EVENT(udc_log_req, usb_ep_free_request, TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret), TP_ARGS(ep, req, ret) ); DEFINE_EVENT(udc_log_req, usb_ep_queue, TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret), TP_ARGS(ep, req, ret) ); DEFINE_EVENT(udc_log_req, usb_ep_dequeue, TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret), TP_ARGS(ep, req, ret) ); DEFINE_EVENT(udc_log_req, usb_gadget_giveback_request, TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret), TP_ARGS(ep, req, ret) ); #endif /* __UDC_TRACE_H */ /* this part has to be here */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
17 17 16 16 16 6 17 12 7 13 6 7 7 18 6 14 8 8 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 // SPDX-License-Identifier: GPL-2.0 /* * mm/fadvise.c * * Copyright (C) 2002, Linus Torvalds * * 11Jan2003 Andrew Morton * Initial version. */ #include <linux/kernel.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/backing-dev.h> #include <linux/fadvise.h> #include <linux/writeback.h> #include <linux/syscalls.h> #include <linux/swap.h> #include <asm/unistd.h> #include "internal.h" /* * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could * deactivate the pages and clear PG_Referenced. */ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) { struct inode *inode; struct address_space *mapping; struct backing_dev_info *bdi; loff_t endbyte; /* inclusive */ pgoff_t start_index; pgoff_t end_index; unsigned long nrpages; inode = file_inode(file); if (S_ISFIFO(inode->i_mode)) return -ESPIPE; mapping = file->f_mapping; if (!mapping || len < 0) return -EINVAL; bdi = inode_to_bdi(mapping->host); if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_RANDOM: case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_WILLNEED: case POSIX_FADV_NOREUSE: case POSIX_FADV_DONTNEED: /* no bad return value, but ignore advice */ break; default: return -EINVAL; } return 0; } /* * Careful about overflows. Len == 0 means "as much as possible". Use * unsigned math because signed overflows are undefined and UBSan * complains. */ endbyte = (u64)offset + (u64)len; if (!len || endbyte < len) endbyte = LLONG_MAX; else endbyte--; /* inclusive */ switch (advice) { case POSIX_FADV_NORMAL: file->f_ra.ra_pages = bdi->ra_pages; spin_lock(&file->f_lock); file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE); spin_unlock(&file->f_lock); break; case POSIX_FADV_RANDOM: spin_lock(&file->f_lock); file->f_mode |= FMODE_RANDOM; spin_unlock(&file->f_lock); break; case POSIX_FADV_SEQUENTIAL: file->f_ra.ra_pages = bdi->ra_pages * 2; spin_lock(&file->f_lock); file->f_mode &= ~FMODE_RANDOM; spin_unlock(&file->f_lock); break; case POSIX_FADV_WILLNEED: /* First and last PARTIAL page! */ start_index = offset >> PAGE_SHIFT; end_index = endbyte >> PAGE_SHIFT; /* Careful about overflow on the "+1" */ nrpages = end_index - start_index + 1; if (!nrpages) nrpages = ~0UL; force_page_cache_readahead(mapping, file, start_index, nrpages); break; case POSIX_FADV_NOREUSE: spin_lock(&file->f_lock); file->f_mode |= FMODE_NOREUSE; spin_unlock(&file->f_lock); break; case POSIX_FADV_DONTNEED: __filemap_fdatawrite_range(mapping, offset, endbyte, WB_SYNC_NONE); /* * First and last FULL page! Partial pages are deliberately * preserved on the expectation that it is better to preserve * needed memory than to discard unneeded memory. */ start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; end_index = (endbyte >> PAGE_SHIFT); /* * The page at end_index will be inclusively discarded according * by invalidate_mapping_pages(), so subtracting 1 from * end_index means we will skip the last page. But if endbyte * is page aligned or is at the end of file, we should not skip * that page - discarding the last page is safe enough. */ if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && endbyte != inode->i_size - 1) { /* First page is tricky as 0 - 1 = -1, but pgoff_t * is unsigned, so the end_index >= start_index * check below would be true and we'll discard the whole * file cache which is not what was asked. */ if (end_index == 0) break; end_index--; } if (end_index >= start_index) { unsigned long nr_failed = 0; /* * It's common to FADV_DONTNEED right after * the read or write that instantiates the * pages, in which case there will be some * sitting on the local LRU cache. Try to * avoid the expensive remote drain and the * second cache tree walk below by flushing * them out right away. */ lru_add_drain(); mapping_try_invalidate(mapping, start_index, end_index, &nr_failed); /* * The failures may be due to the folio being * in the LRU cache of a remote CPU. Drain all * caches and try again. */ if (nr_failed) { lru_add_drain_all(); invalidate_mapping_pages(mapping, start_index, end_index); } } break; default: return -EINVAL; } return 0; } EXPORT_SYMBOL(generic_fadvise); int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) { if (file->f_op->fadvise) return file->f_op->fadvise(file, offset, len, advice); return generic_fadvise(file, offset, len, advice); } EXPORT_SYMBOL(vfs_fadvise); #ifdef CONFIG_ADVISE_SYSCALLS int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) { struct fd f = fdget(fd); int ret; if (!f.file) return -EBADF; ret = vfs_fadvise(f.file, offset, len, advice); fdput(f); return ret; } SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) { return ksys_fadvise64_64(fd, offset, len, advice); } #ifdef __ARCH_WANT_SYS_FADVISE64 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) { return ksys_fadvise64_64(fd, offset, len, advice); } #endif #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64) COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset), compat_arg_u64_dual(len), int, advice) { return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset), compat_arg_u64_glue(len), advice); } #endif #endif
111 110 105 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 // SPDX-License-Identifier: GPL-2.0-only /* * scsi_pm.c Copyright (C) 2010 Alan Stern * * SCSI dynamic Power Management * Initial version: Alan Stern <stern@rowland.harvard.edu> */ #include <linux/pm_runtime.h> #include <linux/export.h> #include <linux/blk-pm.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_host.h> #include "scsi_priv.h" #ifdef CONFIG_PM_SLEEP static int do_scsi_suspend(struct device *dev, const struct dev_pm_ops *pm) { return pm && pm->suspend ? pm->suspend(dev) : 0; } static int do_scsi_freeze(struct device *dev, const struct dev_pm_ops *pm) { return pm && pm->freeze ? pm->freeze(dev) : 0; } static int do_scsi_poweroff(struct device *dev, const struct dev_pm_ops *pm) { return pm && pm->poweroff ? pm->poweroff(dev) : 0; } static int do_scsi_resume(struct device *dev, const struct dev_pm_ops *pm) { return pm && pm->resume ? pm->resume(dev) : 0; } static int do_scsi_thaw(struct device *dev, const struct dev_pm_ops *pm) { return pm && pm->thaw ? pm->thaw(dev) : 0; } static int do_scsi_restore(struct device *dev, const struct dev_pm_ops *pm) { return pm && pm->restore ? pm->restore(dev) : 0; } static int scsi_dev_type_suspend(struct device *dev, int (*cb)(struct device *, const struct dev_pm_ops *)) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int err; err = scsi_device_quiesce(to_scsi_device(dev)); if (err == 0) { err = cb(dev, pm); if (err) scsi_device_resume(to_scsi_device(dev)); } dev_dbg(dev, "scsi suspend: %d\n", err); return err; } static int scsi_bus_suspend_common(struct device *dev, int (*cb)(struct device *, const struct dev_pm_ops *)) { if (!scsi_is_sdev_device(dev)) return 0; return scsi_dev_type_suspend(dev, cb); } static int scsi_bus_resume_common(struct device *dev, int (*cb)(struct device *, const struct dev_pm_ops *)) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int err; if (!scsi_is_sdev_device(dev)) return 0; err = cb(dev, pm); scsi_device_resume(to_scsi_device(dev)); dev_dbg(dev, "scsi resume: %d\n", err); return err; } static int scsi_bus_prepare(struct device *dev) { if (scsi_is_host_device(dev)) { /* Wait until async scanning is finished */ scsi_complete_async_scans(); } return 0; } static int scsi_bus_suspend(struct device *dev) { return scsi_bus_suspend_common(dev, do_scsi_suspend); } static int scsi_bus_resume(struct device *dev) { return scsi_bus_resume_common(dev, do_scsi_resume); } static int scsi_bus_freeze(struct device *dev) { return scsi_bus_suspend_common(dev, do_scsi_freeze); } static int scsi_bus_thaw(struct device *dev) { return scsi_bus_resume_common(dev, do_scsi_thaw); } static int scsi_bus_poweroff(struct device *dev) { return scsi_bus_suspend_common(dev, do_scsi_poweroff); } static int scsi_bus_restore(struct device *dev) { return scsi_bus_resume_common(dev, do_scsi_restore); } #else /* CONFIG_PM_SLEEP */ #define scsi_bus_prepare NULL #define scsi_bus_suspend NULL #define scsi_bus_resume NULL #define scsi_bus_freeze NULL #define scsi_bus_thaw NULL #define scsi_bus_poweroff NULL #define scsi_bus_restore NULL #endif /* CONFIG_PM_SLEEP */ static int sdev_runtime_suspend(struct device *dev) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; struct scsi_device *sdev = to_scsi_device(dev); int err = 0; err = blk_pre_runtime_suspend(sdev->request_queue); if (err) return err; if (pm && pm->runtime_suspend) err = pm->runtime_suspend(dev); blk_post_runtime_suspend(sdev->request_queue, err); return err; } static int scsi_runtime_suspend(struct device *dev) { int err = 0; dev_dbg(dev, "scsi_runtime_suspend\n"); if (scsi_is_sdev_device(dev)) err = sdev_runtime_suspend(dev); /* Insert hooks here for targets, hosts, and transport classes */ return err; } static int sdev_runtime_resume(struct device *dev) { struct scsi_device *sdev = to_scsi_device(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int err = 0; blk_pre_runtime_resume(sdev->request_queue); if (pm && pm->runtime_resume) err = pm->runtime_resume(dev); blk_post_runtime_resume(sdev->request_queue); return err; } static int scsi_runtime_resume(struct device *dev) { int err = 0; dev_dbg(dev, "scsi_runtime_resume\n"); if (scsi_is_sdev_device(dev)) err = sdev_runtime_resume(dev); /* Insert hooks here for targets, hosts, and transport classes */ return err; } static int scsi_runtime_idle(struct device *dev) { dev_dbg(dev, "scsi_runtime_idle\n"); /* Insert hooks here for targets, hosts, and transport classes */ if (scsi_is_sdev_device(dev)) { pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); return -EBUSY; } return 0; } int scsi_autopm_get_device(struct scsi_device *sdev) { int err; err = pm_runtime_get_sync(&sdev->sdev_gendev); if (err < 0 && err !=-EACCES) pm_runtime_put_sync(&sdev->sdev_gendev); else err = 0; return err; } EXPORT_SYMBOL_GPL(scsi_autopm_get_device); void scsi_autopm_put_device(struct scsi_device *sdev) { pm_runtime_put_sync(&sdev->sdev_gendev); } EXPORT_SYMBOL_GPL(scsi_autopm_put_device); void scsi_autopm_get_target(struct scsi_target *starget) { pm_runtime_get_sync(&starget->dev); } void scsi_autopm_put_target(struct scsi_target *starget) { pm_runtime_put_sync(&starget->dev); } int scsi_autopm_get_host(struct Scsi_Host *shost) { int err; err = pm_runtime_get_sync(&shost->shost_gendev); if (err < 0 && err !=-EACCES) pm_runtime_put_sync(&shost->shost_gendev); else err = 0; return err; } void scsi_autopm_put_host(struct Scsi_Host *shost) { pm_runtime_put_sync(&shost->shost_gendev); } const struct dev_pm_ops scsi_bus_pm_ops = { .prepare = scsi_bus_prepare, .suspend = scsi_bus_suspend, .resume = scsi_bus_resume, .freeze = scsi_bus_freeze, .thaw = scsi_bus_thaw, .poweroff = scsi_bus_poweroff, .restore = scsi_bus_restore, .runtime_suspend = scsi_runtime_suspend, .runtime_resume = scsi_runtime_resume, .runtime_idle = scsi_runtime_idle, };
3 4 5 1 5 3 3 1 1 1 1 1 1 1 1 5 5 5 2 5 6 6 6 3 3 6 5 6 3 6 5 6 5 6 6 6 6 6 6 6 6 6 6 8 7 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 // SPDX-License-Identifier: GPL-2.0 /* XDP sockets monitoring support * * Copyright(c) 2019 Intel Corporation. * * Author: Björn Töpel <bjorn.topel@intel.com> */ #include <linux/module.h> #include <net/xdp_sock.h> #include <linux/xdp_diag.h> #include <linux/sock_diag.h> #include "xsk_queue.h" #include "xsk.h" static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb) { struct xdp_diag_info di = {}; di.ifindex = xs->dev ? xs->dev->ifindex : 0; di.queue_id = xs->queue_id; return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di); } static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type, struct sk_buff *nlskb) { struct xdp_diag_ring dr = {}; dr.entries = queue->nentries; return nla_put(nlskb, nl_type, sizeof(dr), &dr); } static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, struct sk_buff *nlskb) { int err = 0; if (xs->rx) err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb); if (!err && xs->tx) err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb); return err; } static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) { struct xsk_buff_pool *pool = xs->pool; struct xdp_umem *umem = xs->umem; struct xdp_diag_umem du = {}; int err; if (!umem) return 0; du.id = umem->id; du.size = umem->size; du.num_pages = umem->npgs; du.chunk_size = umem->chunk_size; du.headroom = umem->headroom; du.ifindex = (pool && pool->netdev) ? pool->netdev->ifindex : 0; du.queue_id = pool ? pool->queue_id : 0; du.flags = 0; if (umem->zc) du.flags |= XDP_DU_F_ZEROCOPY; du.refs = refcount_read(&umem->users); err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); if (!err && pool && pool->fq) err = xsk_diag_put_ring(pool->fq, XDP_DIAG_UMEM_FILL_RING, nlskb); if (!err && pool && pool->cq) err = xsk_diag_put_ring(pool->cq, XDP_DIAG_UMEM_COMPLETION_RING, nlskb); return err; } static int xsk_diag_put_stats(const struct xdp_sock *xs, struct sk_buff *nlskb) { struct xdp_diag_stats du = {}; du.n_rx_dropped = xs->rx_dropped; du.n_rx_invalid = xskq_nb_invalid_descs(xs->rx); du.n_rx_full = xs->rx_queue_full; du.n_fill_ring_empty = xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0; du.n_tx_invalid = xskq_nb_invalid_descs(xs->tx); du.n_tx_ring_empty = xskq_nb_queue_empty_descs(xs->tx); return nla_put(nlskb, XDP_DIAG_STATS, sizeof(du), &du); } static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, struct xdp_diag_req *req, struct user_namespace *user_ns, u32 portid, u32 seq, u32 flags, int sk_ino) { struct xdp_sock *xs = xdp_sk(sk); struct xdp_diag_msg *msg; struct nlmsghdr *nlh; nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg), flags); if (!nlh) return -EMSGSIZE; msg = nlmsg_data(nlh); memset(msg, 0, sizeof(*msg)); msg->xdiag_family = AF_XDP; msg->xdiag_type = sk->sk_type; msg->xdiag_ino = sk_ino; sock_diag_save_cookie(sk, msg->xdiag_cookie); mutex_lock(&xs->mutex); if (READ_ONCE(xs->state) == XSK_UNBOUND) goto out_nlmsg_trim; if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) goto out_nlmsg_trim; if ((req->xdiag_show & XDP_SHOW_INFO) && nla_put_u32(nlskb, XDP_DIAG_UID, from_kuid_munged(user_ns, sock_i_uid(sk)))) goto out_nlmsg_trim; if ((req->xdiag_show & XDP_SHOW_RING_CFG) && xsk_diag_put_rings_cfg(xs, nlskb)) goto out_nlmsg_trim; if ((req->xdiag_show & XDP_SHOW_UMEM) && xsk_diag_put_umem(xs, nlskb)) goto out_nlmsg_trim; if ((req->xdiag_show & XDP_SHOW_MEMINFO) && sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) goto out_nlmsg_trim; if ((req->xdiag_show & XDP_SHOW_STATS) && xsk_diag_put_stats(xs, nlskb)) goto out_nlmsg_trim; mutex_unlock(&xs->mutex); nlmsg_end(nlskb, nlh); return 0; out_nlmsg_trim: mutex_unlock(&xs->mutex); nlmsg_cancel(nlskb, nlh); return -EMSGSIZE; } static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb) { struct xdp_diag_req *req = nlmsg_data(cb->nlh); struct net *net = sock_net(nlskb->sk); int num = 0, s_num = cb->args[0]; struct sock *sk; mutex_lock(&net->xdp.lock); sk_for_each(sk, &net->xdp.list) { if (!net_eq(sock_net(sk), net)) continue; if (num++ < s_num) continue; if (xsk_diag_fill(sk, nlskb, req, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, sock_i_ino(sk)) < 0) { num--; break; } } mutex_unlock(&net->xdp.lock); cb->args[0] = num; return nlskb->len; } static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) { struct netlink_dump_control c = { .dump = xsk_diag_dump }; int hdrlen = sizeof(struct xdp_diag_req); struct net *net = sock_net(nlskb->sk); if (nlmsg_len(hdr) < hdrlen) return -EINVAL; if (!(hdr->nlmsg_flags & NLM_F_DUMP)) return -EOPNOTSUPP; return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c); } static const struct sock_diag_handler xsk_diag_handler = { .owner = THIS_MODULE, .family = AF_XDP, .dump = xsk_diag_handler_dump, }; static int __init xsk_diag_init(void) { return sock_diag_register(&xsk_diag_handler); } static void __exit xsk_diag_exit(void) { sock_diag_unregister(&xsk_diag_handler); } module_init(xsk_diag_init); module_exit(xsk_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
528 511 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* V4L2 device support header. Copyright (C) 2008 Hans Verkuil <hverkuil@xs4all.nl> */ #ifndef _V4L2_DEVICE_H #define _V4L2_DEVICE_H #include <media/media-device.h> #include <media/v4l2-subdev.h> #include <media/v4l2-dev.h> struct v4l2_ctrl_handler; /** * struct v4l2_device - main struct to for V4L2 device drivers * * @dev: pointer to struct device. * @mdev: pointer to struct media_device, may be NULL. * @subdevs: used to keep track of the registered subdevs * @lock: lock this struct; can be used by the driver as well * if this struct is embedded into a larger struct. * @name: unique device name, by default the driver name + bus ID * @notify: notify operation called by some sub-devices. * @ctrl_handler: The control handler. May be %NULL. * @prio: Device's priority state * @ref: Keep track of the references to this struct. * @release: Release function that is called when the ref count * goes to 0. * * Each instance of a V4L2 device should create the v4l2_device struct, * either stand-alone or embedded in a larger struct. * * It allows easy access to sub-devices (see v4l2-subdev.h) and provides * basic V4L2 device-level support. * * .. note:: * * #) @dev->driver_data points to this struct. * #) @dev might be %NULL if there is no parent device */ struct v4l2_device { struct device *dev; struct media_device *mdev; struct list_head subdevs; spinlock_t lock; char name[36]; void (*notify)(struct v4l2_subdev *sd, unsigned int notification, void *arg); struct v4l2_ctrl_handler *ctrl_handler; struct v4l2_prio_state prio; struct kref ref; void (*release)(struct v4l2_device *v4l2_dev); }; /** * v4l2_device_get - gets a V4L2 device reference * * @v4l2_dev: pointer to struct &v4l2_device * * This is an ancillary routine meant to increment the usage for the * struct &v4l2_device pointed by @v4l2_dev. */ static inline void v4l2_device_get(struct v4l2_device *v4l2_dev) { kref_get(&v4l2_dev->ref); } /** * v4l2_device_put - puts a V4L2 device reference * * @v4l2_dev: pointer to struct &v4l2_device * * This is an ancillary routine meant to decrement the usage for the * struct &v4l2_device pointed by @v4l2_dev. */ int v4l2_device_put(struct v4l2_device *v4l2_dev); /** * v4l2_device_register - Initialize v4l2_dev and make @dev->driver_data * point to @v4l2_dev. * * @dev: pointer to struct &device * @v4l2_dev: pointer to struct &v4l2_device * * .. note:: * @dev may be %NULL in rare cases (ISA devices). * In such case the caller must fill in the @v4l2_dev->name field * before calling this function. */ int __must_check v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); /** * v4l2_device_set_name - Optional function to initialize the * name field of struct &v4l2_device * * @v4l2_dev: pointer to struct &v4l2_device * @basename: base name for the device name * @instance: pointer to a static atomic_t var with the instance usage for * the device driver. * * v4l2_device_set_name() initializes the name field of struct &v4l2_device * using the driver name and a driver-global atomic_t instance. * * This function will increment the instance counter and returns the * instance value used in the name. * * Example: * * static atomic_t drv_instance = ATOMIC_INIT(0); * * ... * * instance = v4l2_device_set_name(&\ v4l2_dev, "foo", &\ drv_instance); * * The first time this is called the name field will be set to foo0 and * this function returns 0. If the name ends with a digit (e.g. cx18), * then the name will be set to cx18-0 since cx180 would look really odd. */ int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename, atomic_t *instance); /** * v4l2_device_disconnect - Change V4L2 device state to disconnected. * * @v4l2_dev: pointer to struct v4l2_device * * Should be called when the USB parent disconnects. * Since the parent disappears, this ensures that @v4l2_dev doesn't have * an invalid parent pointer. * * .. note:: This function sets @v4l2_dev->dev to NULL. */ void v4l2_device_disconnect(struct v4l2_device *v4l2_dev); /** * v4l2_device_unregister - Unregister all sub-devices and any other * resources related to @v4l2_dev. * * @v4l2_dev: pointer to struct v4l2_device */ void v4l2_device_unregister(struct v4l2_device *v4l2_dev); /** * v4l2_device_register_subdev - Registers a subdev with a v4l2 device. * * @v4l2_dev: pointer to struct &v4l2_device * @sd: pointer to &struct v4l2_subdev * * While registered, the subdev module is marked as in-use. * * An error is returned if the module is no longer loaded on any attempts * to register it. */ int __must_check v4l2_device_register_subdev(struct v4l2_device *v4l2_dev, struct v4l2_subdev *sd); /** * v4l2_device_unregister_subdev - Unregisters a subdev with a v4l2 device. * * @sd: pointer to &struct v4l2_subdev * * .. note :: * * Can also be called if the subdev wasn't registered. In such * case, it will do nothing. */ void v4l2_device_unregister_subdev(struct v4l2_subdev *sd); /** * __v4l2_device_register_subdev_nodes - Registers device nodes for * all subdevs of the v4l2 device that are marked with the * %V4L2_SUBDEV_FL_HAS_DEVNODE flag. * * @v4l2_dev: pointer to struct v4l2_device * @read_only: subdevices read-only flag. True to register the subdevices * device nodes in read-only mode, false to allow full access to the * subdevice userspace API. */ int __must_check __v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev, bool read_only); /** * v4l2_device_register_subdev_nodes - Registers subdevices device nodes with * unrestricted access to the subdevice userspace operations * * Internally calls __v4l2_device_register_subdev_nodes(). See its documentation * for more details. * * @v4l2_dev: pointer to struct v4l2_device */ static inline int __must_check v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev) { #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API) return __v4l2_device_register_subdev_nodes(v4l2_dev, false); #else return 0; #endif } /** * v4l2_device_register_ro_subdev_nodes - Registers subdevices device nodes * in read-only mode * * Internally calls __v4l2_device_register_subdev_nodes(). See its documentation * for more details. * * @v4l2_dev: pointer to struct v4l2_device */ static inline int __must_check v4l2_device_register_ro_subdev_nodes(struct v4l2_device *v4l2_dev) { #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API) return __v4l2_device_register_subdev_nodes(v4l2_dev, true); #else return 0; #endif } /** * v4l2_subdev_notify - Sends a notification to v4l2_device. * * @sd: pointer to &struct v4l2_subdev * @notification: type of notification. Please notice that the notification * type is driver-specific. * @arg: arguments for the notification. Those are specific to each * notification type. */ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, unsigned int notification, void *arg) { if (sd && sd->v4l2_dev && sd->v4l2_dev->notify) sd->v4l2_dev->notify(sd, notification, arg); } /** * v4l2_device_supports_requests - Test if requests are supported. * * @v4l2_dev: pointer to struct v4l2_device */ static inline bool v4l2_device_supports_requests(struct v4l2_device *v4l2_dev) { return v4l2_dev->mdev && v4l2_dev->mdev->ops && v4l2_dev->mdev->ops->req_queue; } /* Helper macros to iterate over all subdevs. */ /** * v4l2_device_for_each_subdev - Helper macro that interates over all * sub-devices of a given &v4l2_device. * * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev pointer used as an iterator by the loop. * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * * This macro iterates over all sub-devices owned by the @v4l2_dev device. * It acts as a for loop iterator and executes the next statement with * the @sd variable pointing to each sub-device in turn. */ #define v4l2_device_for_each_subdev(sd, v4l2_dev) \ list_for_each_entry(sd, &(v4l2_dev)->subdevs, list) /** * __v4l2_device_call_subdevs_p - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev pointer used as an iterator by the loop. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_p(v4l2_dev, sd, cond, o, f, args...) \ do { \ list_for_each_entry((sd), &(v4l2_dev)->subdevs, list) \ if ((cond) && (sd)->ops->o && (sd)->ops->o->f) \ (sd)->ops->o->f((sd) , ##args); \ } while (0) /** * __v4l2_device_call_subdevs - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs(v4l2_dev, cond, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, cond, o, \ f , ##args); \ } while (0) /** * __v4l2_device_call_subdevs_until_err_p - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev sub-devices associated with @v4l2_dev. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, zero * otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_until_err_p(v4l2_dev, sd, cond, o, f, args...) \ ({ \ long __err = 0; \ \ list_for_each_entry((sd), &(v4l2_dev)->subdevs, list) { \ if ((cond) && (sd)->ops->o && (sd)->ops->o->f) \ __err = (sd)->ops->o->f((sd) , ##args); \ if (__err && __err != -ENOIOCTLCMD) \ break; \ } \ (__err == -ENOIOCTLCMD) ? 0 : __err; \ }) /** * __v4l2_device_call_subdevs_until_err - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_until_err(v4l2_dev, cond, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, cond, o, \ f , ##args); \ }) /** * v4l2_device_call_all - Calls the specified operation for * all subdevs matching the &v4l2_subdev.grp_id, as assigned * by the bridge driver. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_call_all(v4l2_dev, grpid, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) /** * v4l2_device_call_until_err - Calls the specified operation for * all subdevs matching the &v4l2_subdev.grp_id, as assigned * by the bridge driver, until an error occurs. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_call_until_err(v4l2_dev, grpid, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) /** * v4l2_device_mask_call_all - Calls the specified operation for * all subdevices where a group ID matches a specified bitmask. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_mask_call_all(v4l2_dev, grpmsk, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ f , ##args); \ } while (0) /** * v4l2_device_mask_call_until_err - Calls the specified operation for * all subdevices where a group ID matches a specified bitmask. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_mask_call_until_err(v4l2_dev, grpmsk, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ f , ##args); \ }) /** * v4l2_device_has_op - checks if any subdev with matching grpid has a * given ops. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. */ #define v4l2_device_has_op(v4l2_dev, grpid, o, f) \ ({ \ struct v4l2_subdev *__sd; \ bool __result = false; \ list_for_each_entry(__sd, &(v4l2_dev)->subdevs, list) { \ if ((grpid) && __sd->grp_id != (grpid)) \ continue; \ if (v4l2_subdev_has_op(__sd, o, f)) { \ __result = true; \ break; \ } \ } \ __result; \ }) /** * v4l2_device_mask_has_op - checks if any subdev with matching group * mask has a given ops. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. */ #define v4l2_device_mask_has_op(v4l2_dev, grpmsk, o, f) \ ({ \ struct v4l2_subdev *__sd; \ bool __result = false; \ list_for_each_entry(__sd, &(v4l2_dev)->subdevs, list) { \ if ((grpmsk) && !(__sd->grp_id & (grpmsk))) \ continue; \ if (v4l2_subdev_has_op(__sd, o, f)) { \ __result = true; \ break; \ } \ } \ __result; \ }) #endif
2511 2515 2512 2514 2015 2017 2020 2015 19890 19908 19900 19903 19895 27 27 27 27 27 27 7 7 7 1 1 1 1 1 189 186 189 189 190 174 190 190 188 2 2 3 3 2 3 2504 2502 2501 2499 1829 2501 2503 2497 2501 2502 2499 36 81 27 27 27 27 81 81 81 81 4 4 4 582 580 580 582 581 582 580 20572 36695 30597 771 771 771 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 1994 Linus Torvalds * * Pentium III FXSR, SSE support * General FPU state handling cleanups * Gareth Hughes <gareth@valinux.com>, May 2000 */ #include <asm/fpu/api.h> #include <asm/fpu/regset.h> #include <asm/fpu/sched.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> #include <asm/traps.h> #include <asm/irq_regs.h> #include <uapi/asm/kvm.h> #include <linux/hardirq.h> #include <linux/pkeys.h> #include <linux/vmalloc.h> #include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" #define CREATE_TRACE_POINTS #include <asm/trace/fpu.h> #ifdef CONFIG_X86_64 DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); DEFINE_PER_CPU(u64, xfd_state); #endif /* The FPU state configuration data for kernel and user space */ struct fpu_state_config fpu_kernel_cfg __ro_after_init; struct fpu_state_config fpu_user_cfg __ro_after_init; /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: */ struct fpstate init_fpstate __ro_after_init; /* Track in-kernel FPU usage */ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* * Track which context is using the FPU on the CPU: */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? */ bool irq_fpu_usable(void) { if (WARN_ON_ONCE(in_nmi())) return false; /* In kernel FPU usage already active? */ if (this_cpu_read(in_kernel_fpu)) return false; /* * When not in NMI or hard interrupt context, FPU can be used in: * * - Task context except from within fpregs_lock()'ed critical * regions. * * - Soft interrupt processing context which cannot happen * while in a fpregs_lock()'ed critical region. */ if (!in_hardirq()) return true; /* * In hard interrupt context it's safe when soft interrupts * are enabled, which means the interrupt did not hit in * a fpregs_lock()'ed critical region. */ return !softirq_count(); } EXPORT_SYMBOL(irq_fpu_usable); /* * Track AVX512 state use because it is known to slow the max clock * speed of the core. */ static void update_avx_timestamp(struct fpu *fpu) { #define AVX512_TRACKING_MASK (XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM) if (fpu->fpstate->regs.xsave.header.xfeatures & AVX512_TRACKING_MASK) fpu->avx512_timestamp = jiffies; } /* * Save the FPU register state in fpu->fpstate->regs. The register state is * preserved. * * Must be called with fpregs_lock() held. * * The legacy FNSAVE instruction clears all FPU state unconditionally, so * register state has to be reloaded. That might be a pointless exercise * when the FPU is going to be used by another task right after that. But * this only affects 20+ years old 32bit systems and avoids conditionals all * over the place. * * FXSAVE and all XSAVE variants preserve the FPU register state. */ void save_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { os_xsave(fpu->fpstate); update_avx_timestamp(fpu); return; } if (likely(use_fxsr())) { fxsave(&fpu->fpstate->regs.fxsave); return; } /* * Legacy FPU register saving, FNSAVE always clears FPU registers, * so we have to reload them from the memory state. */ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave)); frstor(&fpu->fpstate->regs.fsave); } void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) { /* * AMD K7/K8 and later CPUs up to Zen don't save/restore * FDP/FIP/FOP unless an exception is pending. Clear the x87 state * here by setting it to fixed values. "m" is a random variable * that should be in L1. */ if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { asm volatile( "fnclex\n\t" "emms\n\t" "fildl %P[addr]" /* set F?P to defined value */ : : [addr] "m" (fpstate)); } if (use_xsave()) { /* * Dynamically enabled features are enabled in XCR0, but * usage requires also that the corresponding bits in XFD * are cleared. If the bits are set then using a related * instruction will raise #NM. This allows to do the * allocation of the larger FPU buffer lazy from #NM or if * the task has no permission to kill it which would happen * via #UD if the feature is disabled in XCR0. * * XFD state is following the same life time rules as * XSTATE and to restore state correctly XFD has to be * updated before XRSTORS otherwise the component would * stay in or go into init state even if the bits are set * in fpstate::regs::xsave::xfeatures. */ xfd_update_state(fpstate); /* * Restoring state always needs to modify all features * which are in @mask even if the current task cannot use * extended features. * * So fpstate->xfeatures cannot be used here, because then * a feature for which the task has no permission but was * used by the previous task would not go into init state. */ mask = fpu_kernel_cfg.max_features & mask; os_xrstor(fpstate, mask); } else { if (use_fxsr()) fxrstor(&fpstate->regs.fxsave); else frstor(&fpstate->regs.fsave); } } void fpu_reset_from_exception_fixup(void) { restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE); } #if IS_ENABLED(CONFIG_KVM) static void __fpstate_reset(struct fpstate *fpstate, u64 xfd); static void fpu_init_guest_permissions(struct fpu_guest *gfpu) { struct fpu_state_perm *fpuperm; u64 perm; if (!IS_ENABLED(CONFIG_X86_64)) return; spin_lock_irq(&current->sighand->siglock); fpuperm = &current->group_leader->thread.fpu.guest_perm; perm = fpuperm->__state_perm; /* First fpstate allocation locks down permissions. */ WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED); spin_unlock_irq(&current->sighand->siglock); gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED; } bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) { struct fpstate *fpstate; unsigned int size; size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64); fpstate = vzalloc(size); if (!fpstate) return false; /* Leave xfd to 0 (the reset value defined by spec) */ __fpstate_reset(fpstate, 0); fpstate_init_user(fpstate); fpstate->is_valloc = true; fpstate->is_guest = true; gfpu->fpstate = fpstate; gfpu->xfeatures = fpu_user_cfg.default_features; gfpu->perm = fpu_user_cfg.default_features; /* * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state * to userspace, even when XSAVE is unsupported, so that restoring FPU * state on a different CPU that does support XSAVE can cleanly load * the incoming state using its natural XSAVE. In other words, KVM's * uABI size may be larger than this host's default size. Conversely, * the default size should never be larger than KVM's base uABI size; * all features that can expand the uABI size must be opt-in. */ gfpu->uabi_size = sizeof(struct kvm_xsave); if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size)) gfpu->uabi_size = fpu_user_cfg.default_size; fpu_init_guest_permissions(gfpu); return true; } EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate); void fpu_free_guest_fpstate(struct fpu_guest *gfpu) { struct fpstate *fps = gfpu->fpstate; if (!fps) return; if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use)) return; gfpu->fpstate = NULL; vfree(fps); } EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); /* * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable * @guest_fpu: Pointer to the guest FPU container * @xfeatures: Features requested by guest CPUID * * Enable all dynamic xfeatures according to guest perm and requested CPUID. * * Return: 0 on success, error code otherwise */ int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures) { lockdep_assert_preemption_enabled(); /* Nothing to do if all requested features are already enabled. */ xfeatures &= ~guest_fpu->xfeatures; if (!xfeatures) return 0; return __xfd_enable_feature(xfeatures, guest_fpu); } EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features); #ifdef CONFIG_X86_64 void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { fpregs_lock(); guest_fpu->fpstate->xfd = xfd; if (guest_fpu->fpstate->in_use) xfd_update_state(guest_fpu->fpstate); fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu_update_guest_xfd); /** * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state * * Must be invoked from KVM after a VMEXIT before enabling interrupts when * XFD write emulation is disabled. This is required because the guest can * freely modify XFD and the state at VMEXIT is not guaranteed to be the * same as the state on VMENTER. So software state has to be updated before * any operation which depends on it can take place. * * Note: It can be invoked unconditionally even when write emulation is * enabled for the price of a then pointless MSR read. */ void fpu_sync_guest_vmexit_xfd_state(void) { struct fpstate *fps = current->thread.fpu.fpstate; lockdep_assert_irqs_disabled(); if (fpu_state_size_dynamic()) { rdmsrl(MSR_IA32_XFD, fps->xfd); __this_cpu_write(xfd_state, fps->xfd); } } EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state); #endif /* CONFIG_X86_64 */ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) { struct fpstate *guest_fps = guest_fpu->fpstate; struct fpu *fpu = &current->thread.fpu; struct fpstate *cur_fps = fpu->fpstate; fpregs_lock(); if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD)) save_fpregs_to_fpstate(fpu); /* Swap fpstate */ if (enter_guest) { fpu->__task_fpstate = cur_fps; fpu->fpstate = guest_fps; guest_fps->in_use = true; } else { guest_fps->in_use = false; fpu->fpstate = fpu->__task_fpstate; fpu->__task_fpstate = NULL; } cur_fps = fpu->fpstate; if (!cur_fps->is_confidential) { /* Includes XFD update */ restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE); } else { /* * XSTATE is restored by firmware from encrypted * memory. Make sure XFD state is correct while * running with guest fpstate */ xfd_update_state(cur_fps); } fpregs_mark_activate(); fpregs_unlock(); return 0; } EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u64 xfeatures, u32 pkru) { struct fpstate *kstate = gfpu->fpstate; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru, XSTATE_COPY_XSAVE); } else { memcpy(&ustate->fxsave, &kstate->regs.fxsave, sizeof(ustate->fxsave)); /* Make it restorable on a XSAVE enabled host */ ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE; } } EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi); int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru) { struct fpstate *kstate = gfpu->fpstate; const union fpregs_state *ustate = buf; if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) { if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE) return -EINVAL; if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask) return -EINVAL; memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave)); return 0; } if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; /* * Nullify @vpkru to preserve its current value if PKRU's bit isn't set * in the header. KVM's odd ABI is to leave PKRU untouched in this * case (all other components are eventually re-initialized). */ if (!(ustate->xsave.header.xfeatures & XFEATURE_MASK_PKRU)) vpkru = NULL; return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); } EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); #endif /* CONFIG_KVM */ void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); WARN_ON_FPU(!irq_fpu_usable()); WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); save_fpregs_to_fpstate(&current->thread.fpu); } __cpu_invalidate_fpregs_state(); /* Put sane initial values into the control registers. */ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) ldmxcsr(MXCSR_DEFAULT); if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); } EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); /* * Sync the FPU register state to current's memory register state when the * current task owns the FPU. The hardware register state is preserved. */ void fpu_sync_fpstate(struct fpu *fpu) { WARN_ON_FPU(fpu != &current->thread.fpu); fpregs_lock(); trace_x86_fpu_before_save(fpu); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) save_fpregs_to_fpstate(fpu); trace_x86_fpu_after_save(fpu); fpregs_unlock(); } static inline unsigned int init_fpstate_copy_size(void) { if (!use_xsave()) return fpu_kernel_cfg.default_size; /* XSAVE(S) just needs the legacy and the xstate header part */ return sizeof(init_fpstate.regs.xsave); } static inline void fpstate_init_fxstate(struct fpstate *fpstate) { fpstate->regs.fxsave.cwd = 0x37f; fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT; } /* * Legacy x87 fpstate state init: */ static inline void fpstate_init_fstate(struct fpstate *fpstate) { fpstate->regs.fsave.cwd = 0xffff037fu; fpstate->regs.fsave.swd = 0xffff0000u; fpstate->regs.fsave.twd = 0xffffffffu; fpstate->regs.fsave.fos = 0xffff0000u; } /* * Used in two places: * 1) Early boot to setup init_fpstate for non XSAVE systems * 2) fpu_init_fpstate_user() which is invoked from KVM */ void fpstate_init_user(struct fpstate *fpstate) { if (!cpu_feature_enabled(X86_FEATURE_FPU)) { fpstate_init_soft(&fpstate->regs.soft); return; } xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures); if (cpu_feature_enabled(X86_FEATURE_FXSR)) fpstate_init_fxstate(fpstate); else fpstate_init_fstate(fpstate); } static void __fpstate_reset(struct fpstate *fpstate, u64 xfd) { /* Initialize sizes and feature masks */ fpstate->size = fpu_kernel_cfg.default_size; fpstate->user_size = fpu_user_cfg.default_size; fpstate->xfeatures = fpu_kernel_cfg.default_features; fpstate->user_xfeatures = fpu_user_cfg.default_features; fpstate->xfd = xfd; } void fpstate_reset(struct fpu *fpu) { /* Set the fpstate pointer to the default fpstate */ fpu->fpstate = &fpu->__fpstate; __fpstate_reset(fpu->fpstate, init_fpstate.xfd); /* Initialize the permission related info in fpu */ fpu->perm.__state_perm = fpu_kernel_cfg.default_features; fpu->perm.__state_size = fpu_kernel_cfg.default_size; fpu->perm.__user_state_size = fpu_user_cfg.default_size; /* Same defaults for guests */ fpu->guest_perm = fpu->perm; } static inline void fpu_inherit_perms(struct fpu *dst_fpu) { if (fpu_state_size_dynamic()) { struct fpu *src_fpu = &current->group_leader->thread.fpu; spin_lock_irq(&current->sighand->siglock); /* Fork also inherits the permissions of the parent */ dst_fpu->perm = src_fpu->perm; dst_fpu->guest_perm = src_fpu->guest_perm; spin_unlock_irq(&current->sighand->siglock); } } /* A passed ssp of zero will not cause any update */ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp) { #ifdef CONFIG_X86_USER_SHADOW_STACK struct cet_user_state *xstate; /* If ssp update is not needed. */ if (!ssp) return 0; xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave, XFEATURE_CET_USER); /* * If there is a non-zero ssp, then 'dst' must be configured with a shadow * stack and the fpu state should be up to date since it was just copied * from the parent in fpu_clone(). So there must be a valid non-init CET * state location in the buffer. */ if (WARN_ON_ONCE(!xstate)) return 1; xstate->user_ssp = (u64)ssp; #endif return 0; } /* Clone current's FPU state on fork */ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal, unsigned long ssp) { struct fpu *src_fpu = &current->thread.fpu; struct fpu *dst_fpu = &dst->thread.fpu; /* The new task's FPU state cannot be valid in the hardware. */ dst_fpu->last_cpu = -1; fpstate_reset(dst_fpu); if (!cpu_feature_enabled(X86_FEATURE_FPU)) return 0; /* * Enforce reload for user space tasks and prevent kernel threads * from trying to save the FPU registers on context switch. */ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); /* * No FPU state inheritance for kernel threads and IO * worker threads. */ if (minimal) { /* Clear out the minimal state */ memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size()); return 0; } /* * If a new feature is added, ensure all dynamic features are * caller-saved from here! */ BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA); /* * Save the default portion of the current FPU state into the * clone. Assume all dynamic features to be defined as caller- * saved, which enables skipping both the expansion of fpstate * and the copying of any dynamic state. * * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because * copying is not valid when current uses non-default states. */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); save_fpregs_to_fpstate(dst_fpu); fpregs_unlock(); if (!(clone_flags & CLONE_THREAD)) fpu_inherit_perms(dst_fpu); /* * Children never inherit PASID state. * Force it to have its init value: */ if (use_xsave()) dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID; /* * Update shadow stack pointer, in case it changed during clone. */ if (update_fpu_shstk(dst, ssp)) return 1; trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); return 0; } /* * Whitelist the FPU register state embedded into task_struct for hardened * usercopy. */ void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { *offset = offsetof(struct thread_struct, fpu.__fpstate.regs); *size = fpu_kernel_cfg.default_size; } /* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents * in the fpregs in the eager-FPU case. * * This function can be used in cases where we know that * a state-restore is coming: either an explicit one, * or a reschedule. */ void fpu__drop(struct fpu *fpu) { preempt_disable(); if (fpu == &current->thread.fpu) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" _ASM_EXTABLE(1b, 2b)); fpregs_deactivate(fpu); } trace_x86_fpu_dropped(fpu); preempt_enable(); } /* * Clear FPU registers by setting them up from the init fpstate. * Caller must do fpregs_[un]lock() around it. */ static inline void restore_fpregs_from_init_fpstate(u64 features_mask) { if (use_xsave()) os_xrstor(&init_fpstate, features_mask); else if (use_fxsr()) fxrstor(&init_fpstate.regs.fxsave); else frstor(&init_fpstate.regs.fsave); pkru_write_default(); } /* * Reset current->fpu memory state to the init values. */ static void fpu_reset_fpregs(void) { struct fpu *fpu = &current->thread.fpu; fpregs_lock(); __fpu_invalidate_fpregs_state(fpu); /* * This does not change the actual hardware registers. It just * resets the memory image and sets TIF_NEED_FPU_LOAD so a * subsequent return to usermode will reload the registers from the * task's memory image. * * Do not use fpstate_init() here. Just copy init_fpstate which has * the correct content already except for PKRU. * * PKRU handling does not rely on the xstate when restoring for * user space as PKRU is eagerly written in switch_to() and * flush_thread(). */ memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size()); set_thread_flag(TIF_NEED_FPU_LOAD); fpregs_unlock(); } /* * Reset current's user FPU states to the init states. current's * supervisor states, if any, are not modified by this function. The * caller guarantees that the XSTATE header in memory is intact. */ void fpu__clear_user_states(struct fpu *fpu) { WARN_ON_FPU(fpu != &current->thread.fpu); fpregs_lock(); if (!cpu_feature_enabled(X86_FEATURE_FPU)) { fpu_reset_fpregs(); fpregs_unlock(); return; } /* * Ensure that current's supervisor states are loaded into their * corresponding registers. */ if (xfeatures_mask_supervisor() && !fpregs_state_valid(fpu, smp_processor_id())) os_xrstor_supervisor(fpu->fpstate); /* Reset user states in registers. */ restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE); /* * Now all FPU registers have their desired values. Inform the FPU * state machine that current's FPU registers are in the hardware * registers. The memory image does not need to be updated because * any operation relying on it has to save the registers first when * current's FPU is marked active. */ fpregs_mark_activate(); fpregs_unlock(); } void fpu_flush_thread(void) { fpstate_reset(&current->thread.fpu); fpu_reset_fpregs(); } /* * Load FPU context before returning to userspace. */ void switch_fpu_return(void) { if (!static_cpu_has(X86_FEATURE_FPU)) return; fpregs_restore_userregs(); } EXPORT_SYMBOL_GPL(switch_fpu_return); void fpregs_lock_and_load(void) { /* * fpregs_lock() only disables preemption (mostly). So modifying state * in an interrupt could screw up some in progress fpregs operation. * Warn about it. */ WARN_ON_ONCE(!irq_fpu_usable()); WARN_ON_ONCE(current->flags & PF_KTHREAD); fpregs_lock(); fpregs_assert_state_consistent(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); } #ifdef CONFIG_X86_DEBUG_FPU /* * If current FPU state according to its tracking (loaded FPU context on this * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is * loaded on return to userland. */ void fpregs_assert_state_consistent(void) { struct fpu *fpu = &current->thread.fpu; if (test_thread_flag(TIF_NEED_FPU_LOAD)) return; WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); } EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); #endif void fpregs_mark_activate(void) { struct fpu *fpu = &current->thread.fpu; fpregs_activate(fpu); fpu->last_cpu = smp_processor_id(); clear_thread_flag(TIF_NEED_FPU_LOAD); } /* * x87 math exception handling: */ int fpu__exception_code(struct fpu *fpu, int trap_nr) { int err; if (trap_nr == X86_TRAP_MF) { unsigned short cwd, swd; /* * (~cwd & swd) will mask out exceptions that are not set to unmasked * status. 0x3f is the exception bits in these regs, 0x200 is the * C1 reg you need in case of a stack fault, 0x040 is the stack * fault bit. We should only be taking one exception at a time, * so if this combination doesn't produce any single exception, * then we have a bad program that isn't synchronizing its FPU usage * and it will suffer the consequences since we won't be able to * fully reproduce the context of the exception. */ if (boot_cpu_has(X86_FEATURE_FXSR)) { cwd = fpu->fpstate->regs.fxsave.cwd; swd = fpu->fpstate->regs.fxsave.swd; } else { cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd; swd = (unsigned short)fpu->fpstate->regs.fsave.swd; } err = swd & ~cwd; } else { /* * The SIMD FPU exceptions are handled a little differently, as there * is only a single status/control register. Thus, to determine which * unmasked exception was caught we must mask the exception mask bits * at 0x1f80, and then use these to mask the exception bits at 0x3f. */ unsigned short mxcsr = MXCSR_DEFAULT; if (boot_cpu_has(X86_FEATURE_XMM)) mxcsr = fpu->fpstate->regs.fxsave.mxcsr; err = ~(mxcsr >> 7) & mxcsr; } if (err & 0x001) { /* Invalid op */ /* * swd & 0x240 == 0x040: Stack Underflow * swd & 0x240 == 0x240: Stack Overflow * User must clear the SF bit (0x40) if set */ return FPE_FLTINV; } else if (err & 0x004) { /* Divide by Zero */ return FPE_FLTDIV; } else if (err & 0x008) { /* Overflow */ return FPE_FLTOVF; } else if (err & 0x012) { /* Denormal, Underflow */ return FPE_FLTUND; } else if (err & 0x020) { /* Precision */ return FPE_FLTRES; } /* * If we're using IRQ 13, or supposedly even some trap * X86_TRAP_MF implementations, it's possible * we get a spurious trap, which is not an error. */ return 0; } /* * Initialize register state that may prevent from entering low-power idle. * This function will be invoked from the cpuidle driver only when needed. */ noinstr void fpu_idle_fpregs(void) { /* Note: AMX_TILE being enabled implies XGETBV1 support */ if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) && (xfeatures_in_use() & XFEATURE_MASK_XTILE)) { tile_release(); __this_cpu_write(fpu_fpregs_owner_ctx, NULL); } }
1 168 708 690 239 278 1964 73 52 322 37 321 369 359 338 347 19 185 148 15 122 25 46 450 34 764 761 225 326 310 37 80 496 337 46 221 78 312 309 23 94 94 27 75 7 101 29 6 112 110 293 286 5 1 931 98 929 98 99 710 2086 8 9 55 28 256 171 330 1 331 329 1005 256 570 671 671 1995 1995 5 36 312 9 9 6 17 17 6 139 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ext4 #if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EXT4_H #include <linux/writeback.h> #include <linux/tracepoint.h> struct ext4_allocation_context; struct ext4_allocation_request; struct ext4_extent; struct ext4_prealloc_space; struct ext4_inode_info; struct mpage_da_data; struct ext4_map_blocks; struct extent_status; struct ext4_fsmap; struct partial_cluster; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define show_mballoc_flags(flags) __print_flags(flags, "|", \ { EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \ { EXT4_MB_HINT_RESERVED, "HINT_RESV" }, \ { EXT4_MB_HINT_METADATA, "HINT_MDATA" }, \ { EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \ { EXT4_MB_HINT_BEST, "HINT_BEST" }, \ { EXT4_MB_HINT_DATA, "HINT_DATA" }, \ { EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \ { EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \ { EXT4_MB_HINT_GOAL_ONLY, "HINT_GOAL_ONLY" }, \ { EXT4_MB_HINT_TRY_GOAL, "HINT_TRY_GOAL" }, \ { EXT4_MB_DELALLOC_RESERVED, "DELALLOC_RESV" }, \ { EXT4_MB_STREAM_ALLOC, "STREAM_ALLOC" }, \ { EXT4_MB_USE_ROOT_BLOCKS, "USE_ROOT_BLKS" }, \ { EXT4_MB_USE_RESERVED, "USE_RESV" }, \ { EXT4_MB_STRICT_CHECK, "STRICT_CHECK" }) #define show_map_flags(flags) __print_flags(flags, "|", \ { EXT4_GET_BLOCKS_CREATE, "CREATE" }, \ { EXT4_GET_BLOCKS_UNWRIT_EXT, "UNWRIT" }, \ { EXT4_GET_BLOCKS_DELALLOC_RESERVE, "DELALLOC" }, \ { EXT4_GET_BLOCKS_PRE_IO, "PRE_IO" }, \ { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, "CONVERT_UNWRITTEN" }, \ { EXT4_GET_BLOCKS_ZERO, "ZERO" }, \ { EXT4_GET_BLOCKS_IO_SUBMIT, "IO_SUBMIT" }, \ { EXT4_EX_NOCACHE, "EX_NOCACHE" }) /* * __print_flags() requires that all enum values be wrapped in the * TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace * ring buffer. */ TRACE_DEFINE_ENUM(BH_New); TRACE_DEFINE_ENUM(BH_Mapped); TRACE_DEFINE_ENUM(BH_Unwritten); TRACE_DEFINE_ENUM(BH_Boundary); #define show_mflags(flags) __print_flags(flags, "", \ { EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_MAPPED, "M" }, \ { EXT4_MAP_UNWRITTEN, "U" }, \ { EXT4_MAP_BOUNDARY, "B" }) #define show_free_flags(flags) __print_flags(flags, "|", \ { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ { EXT4_FREE_BLOCKS_FORGET, "FORGET" }, \ { EXT4_FREE_BLOCKS_VALIDATED, "VALIDATED" }, \ { EXT4_FREE_BLOCKS_NO_QUOT_UPDATE, "NO_QUOTA" }, \ { EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\ { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) TRACE_DEFINE_ENUM(ES_WRITTEN_B); TRACE_DEFINE_ENUM(ES_UNWRITTEN_B); TRACE_DEFINE_ENUM(ES_DELAYED_B); TRACE_DEFINE_ENUM(ES_HOLE_B); TRACE_DEFINE_ENUM(ES_REFERENCED_B); #define show_extent_status(status) __print_flags(status, "", \ { EXTENT_STATUS_WRITTEN, "W" }, \ { EXTENT_STATUS_UNWRITTEN, "U" }, \ { EXTENT_STATUS_DELAYED, "D" }, \ { EXTENT_STATUS_HOLE, "H" }, \ { EXTENT_STATUS_REFERENCED, "R" }) #define show_falloc_mode(mode) __print_flags(mode, "|", \ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM); TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT); TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); TRACE_DEFINE_ENUM(EXT4_FC_REASON_ENCRYPTED_FILENAME); TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); #define show_fc_reason(reason) \ __print_symbolic(reason, \ { EXT4_FC_REASON_XATTR, "XATTR"}, \ { EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \ { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \ { EXT4_FC_REASON_NOMEM, "NO_MEM"}, \ { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \ { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \ { EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"}) TRACE_DEFINE_ENUM(CR_POWER2_ALIGNED); TRACE_DEFINE_ENUM(CR_GOAL_LEN_FAST); TRACE_DEFINE_ENUM(CR_BEST_AVAIL_LEN); TRACE_DEFINE_ENUM(CR_GOAL_LEN_SLOW); TRACE_DEFINE_ENUM(CR_ANY_FREE); #define show_criteria(cr) \ __print_symbolic(cr, \ { CR_POWER2_ALIGNED, "CR_POWER2_ALIGNED" }, \ { CR_GOAL_LEN_FAST, "CR_GOAL_LEN_FAST" }, \ { CR_BEST_AVAIL_LEN, "CR_BEST_AVAIL_LEN" }, \ { CR_GOAL_LEN_SLOW, "CR_GOAL_LEN_SLOW" }, \ { CR_ANY_FREE, "CR_ANY_FREE" }) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), TP_ARGS(inode, orig_ino), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, orig_ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u16, mode ) ), TP_fast_assign( __entry->orig_ino = orig_ino; __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->orig_ino, (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid) ); TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u64, blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->blocks = inode->i_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid, __entry->blocks) ); TRACE_EVENT(ext4_request_inode, TP_PROTO(struct inode *dir, int mode), TP_ARGS(dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_allocate_inode, TP_PROTO(struct inode *inode, struct inode *dir, int mode), TP_ARGS(inode, dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_evict_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, nlink ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->nlink = inode->i_nlink; ), TP_printk("dev %d,%d ino %lu nlink %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nlink) ); TRACE_EVENT(ext4_drop_inode, TP_PROTO(struct inode *inode, int drop), TP_ARGS(inode, drop), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, drop ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->drop = drop; ), TP_printk("dev %d,%d ino %lu drop %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->drop) ); TRACE_EVENT(ext4_nfs_commit_metadata, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; ), TP_printk("dev %d,%d ino %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mark_inode_dirty, TP_PROTO(struct inode *inode, unsigned long IP), TP_ARGS(inode, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field(unsigned long, ip ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ip = IP; ), TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); TRACE_EVENT(ext4_begin_ordered_truncate, TP_PROTO(struct inode *inode, loff_t new_size), TP_ARGS(inode, new_size), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, new_size ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->new_size = new_size; ), TP_printk("dev %d,%d ino %lu new_size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->new_size) ); DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len), TP_ARGS(inode, pos, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; ), TP_printk("dev %d,%d ino %lu pos %lld len %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len) ); DEFINE_EVENT(ext4__write_begin, ext4_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len), TP_ARGS(inode, pos, len) ); DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len), TP_ARGS(inode, pos, len) ); DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) __field( unsigned int, copied ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->copied = copied; ), TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->copied) ); DEFINE_EVENT(ext4__write_end, ext4_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( long, nr_to_write ) __field( long, pages_skipped ) __field( loff_t, range_start ) __field( loff_t, range_end ) __field( pgoff_t, writeback_index ) __field( int, sync_mode ) __field( char, for_kupdate ) __field( char, range_cyclic ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->range_start = wbc->range_start; __entry->range_end = wbc->range_end; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->sync_mode = wbc->sync_mode; __entry->for_kupdate = wbc->for_kupdate; __entry->range_cyclic = wbc->range_cyclic; ), TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " "range_start %lld range_end %lld sync_mode %d " "for_kupdate %d range_cyclic %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->sync_mode, __entry->for_kupdate, __entry->range_cyclic, (unsigned long) __entry->writeback_index) ); TRACE_EVENT(ext4_da_write_pages, TP_PROTO(struct inode *inode, pgoff_t first_page, struct writeback_control *wbc), TP_ARGS(inode, first_page, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, first_page ) __field( long, nr_to_write ) __field( int, sync_mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->first_page = first_page; __entry->nr_to_write = wbc->nr_to_write; __entry->sync_mode = wbc->sync_mode; ), TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld " "sync_mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->first_page, __entry->nr_to_write, __entry->sync_mode) ); TRACE_EVENT(ext4_da_write_pages_extent, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map), TP_ARGS(inode, map), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, lblk ) __field( __u32, len ) __field( __u32, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = map->m_lblk; __entry->len = map->m_len; __entry->flags = map->m_flags; ), TP_printk("dev %d,%d ino %lu lblk %llu len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, show_mflags(__entry->flags)) ); TRACE_EVENT(ext4_writepages_result, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int ret, int pages_written), TP_ARGS(inode, wbc, ret, pages_written), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) __field( int, pages_written ) __field( long, pages_skipped ) __field( pgoff_t, writeback_index ) __field( int, sync_mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ret = ret; __entry->pages_written = pages_written; __entry->pages_skipped = wbc->pages_skipped; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->sync_mode = wbc->sync_mode; ), TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld " "sync_mode %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, __entry->sync_mode, (unsigned long) __entry->writeback_index) ); DECLARE_EVENT_CLASS(ext4__folio_op, TP_PROTO(struct inode *inode, struct folio *folio), TP_ARGS(inode, folio), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, index ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->index = folio->index; ), TP_printk("dev %d,%d ino %lu folio_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->index) ); DEFINE_EVENT(ext4__folio_op, ext4_read_folio, TP_PROTO(struct inode *inode, struct folio *folio), TP_ARGS(inode, folio) ); DEFINE_EVENT(ext4__folio_op, ext4_release_folio, TP_PROTO(struct inode *inode, struct folio *folio), TP_ARGS(inode, folio) ); DECLARE_EVENT_CLASS(ext4_invalidate_folio_op, TP_PROTO(struct folio *folio, size_t offset, size_t length), TP_ARGS(folio, offset, length), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, index ) __field( size_t, offset ) __field( size_t, length ) ), TP_fast_assign( __entry->dev = folio->mapping->host->i_sb->s_dev; __entry->ino = folio->mapping->host->i_ino; __entry->index = folio->index; __entry->offset = offset; __entry->length = length; ), TP_printk("dev %d,%d ino %lu folio_index %lu offset %zu length %zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->index, __entry->offset, __entry->length) ); DEFINE_EVENT(ext4_invalidate_folio_op, ext4_invalidate_folio, TP_PROTO(struct folio *folio, size_t offset, size_t length), TP_ARGS(folio, offset, length) ); DEFINE_EVENT(ext4_invalidate_folio_op, ext4_journalled_invalidate_folio, TP_PROTO(struct folio *folio, size_t offset, size_t length), TP_ARGS(folio, offset, length) ); TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), TP_ARGS(sb, blk, count), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u64, blk ) __field( __u64, count ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->blk = blk; __entry->count = count; ), TP_printk("dev %d,%d blk %llu count %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blk, __entry->count) ); DECLARE_EVENT_CLASS(ext4__mb_new_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, pa_pstart ) __field( __u64, pa_lstart ) __field( __u32, pa_len ) ), TP_fast_assign( __entry->dev = ac->ac_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->pa_pstart = pa->pa_pstart; __entry->pa_lstart = pa->pa_lstart; __entry->pa_len = pa->pa_len; ), TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) ); DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa) ); DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa) ); TRACE_EVENT(ext4_mb_release_inode_pa, TP_PROTO(struct ext4_prealloc_space *pa, unsigned long long block, unsigned int count), TP_ARGS(pa, block, count), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( __u32, count ) ), TP_fast_assign( __entry->dev = pa->pa_inode->i_sb->s_dev; __entry->ino = pa->pa_inode->i_ino; __entry->block = block; __entry->count = count; ), TP_printk("dev %d,%d ino %lu block %llu count %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->block, __entry->count) ); TRACE_EVENT(ext4_mb_release_group_pa, TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa), TP_ARGS(sb, pa), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u64, pa_pstart ) __field( __u32, pa_len ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), TP_printk("dev %d,%d pstart %llu len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->pa_pstart, __entry->pa_len) ); TRACE_EVENT(ext4_discard_preallocations, TP_PROTO(struct inode *inode, unsigned int len), TP_ARGS(inode, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->len = len; ), TP_printk("dev %d,%d ino %lu len: %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->len) ); TRACE_EVENT(ext4_mb_discard_preallocations, TP_PROTO(struct super_block *sb, int needed), TP_ARGS(sb, needed), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, needed ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->needed = needed; ), TP_printk("dev %d,%d needed %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->needed) ); TRACE_EVENT(ext4_request_blocks, TP_PROTO(struct ext4_allocation_request *ar), TP_ARGS(ar), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, len ) __field( __u32, logical ) __field( __u32, lleft ) __field( __u32, lright ) __field( __u64, goal ) __field( __u64, pleft ) __field( __u64, pright ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = ar->inode->i_sb->s_dev; __entry->ino = ar->inode->i_ino; __entry->len = ar->len; __entry->logical = ar->logical; __entry->goal = ar->goal; __entry->lleft = ar->lleft; __entry->lright = ar->lright; __entry->pleft = ar->pleft; __entry->pright = ar->pright; __entry->flags = ar->flags; ), TP_printk("dev %d,%d ino %lu flags %s len %u lblk %u goal %llu " "lleft %u lright %u pleft %llu pright %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_allocate_blocks, TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block), TP_ARGS(ar, block), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( unsigned int, len ) __field( __u32, logical ) __field( __u32, lleft ) __field( __u32, lright ) __field( __u64, goal ) __field( __u64, pleft ) __field( __u64, pright ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = ar->inode->i_sb->s_dev; __entry->ino = ar->inode->i_ino; __entry->block = block; __entry->len = ar->len; __entry->logical = ar->logical; __entry->goal = ar->goal; __entry->lleft = ar->lleft; __entry->lright = ar->lright; __entry->pleft = ar->pleft; __entry->pright = ar->pright; __entry->flags = ar->flags; ), TP_printk("dev %d,%d ino %lu flags %s len %u block %llu lblk %u " "goal %llu lleft %u lright %u pleft %llu pright %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->block, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_free_blocks, TP_PROTO(struct inode *inode, __u64 block, unsigned long count, int flags), TP_ARGS(inode, block, count, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( unsigned long, count ) __field( int, flags ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->block = block; __entry->count = count; __entry->flags = flags; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->block, __entry->count, show_free_flags(__entry->flags)) ); TRACE_EVENT(ext4_sync_file_enter, TP_PROTO(struct file *file, int datasync), TP_ARGS(file, datasync), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, parent ) __field( int, datasync ) ), TP_fast_assign( struct dentry *dentry = file->f_path.dentry; __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->datasync = datasync; __entry->parent = d_inode(dentry->d_parent)->i_ino; ), TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) ); TRACE_EVENT(ext4_sync_file_exit, TP_PROTO(struct inode *inode, int ret), TP_ARGS(inode, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) ); TRACE_EVENT(ext4_sync_fs, TP_PROTO(struct super_block *sb, int wait), TP_ARGS(sb, wait), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, wait ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->wait = wait; ), TP_printk("dev %d,%d wait %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->wait) ); TRACE_EVENT(ext4_alloc_da_blocks, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, data_blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; ), TP_printk("dev %d,%d ino %lu reserved_data_blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->data_blocks) ); TRACE_EVENT(ext4_mballoc_alloc, TP_PROTO(struct ext4_allocation_context *ac), TP_ARGS(ac), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u32, orig_logical ) __field( int, orig_start ) __field( __u32, orig_group ) __field( int, orig_len ) __field( __u32, goal_logical ) __field( int, goal_start ) __field( __u32, goal_group ) __field( int, goal_len ) __field( __u32, result_logical ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) __field( __u16, found ) __field( __u16, groups ) __field( __u16, buddy ) __field( __u16, flags ) __field( __u16, tail ) __field( __u8, cr ) ), TP_fast_assign( __entry->dev = ac->ac_inode->i_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->orig_logical = ac->ac_o_ex.fe_logical; __entry->orig_start = ac->ac_o_ex.fe_start; __entry->orig_group = ac->ac_o_ex.fe_group; __entry->orig_len = ac->ac_o_ex.fe_len; __entry->goal_logical = ac->ac_g_ex.fe_logical; __entry->goal_start = ac->ac_g_ex.fe_start; __entry->goal_group = ac->ac_g_ex.fe_group; __entry->goal_len = ac->ac_g_ex.fe_len; __entry->result_logical = ac->ac_f_ex.fe_logical; __entry->result_start = ac->ac_f_ex.fe_start; __entry->result_group = ac->ac_f_ex.fe_group; __entry->result_len = ac->ac_f_ex.fe_len; __entry->found = ac->ac_found; __entry->flags = ac->ac_flags; __entry->groups = ac->ac_groups_scanned; __entry->buddy = ac->ac_buddy; __entry->tail = ac->ac_tail; __entry->cr = ac->ac_criteria; ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " "result %u/%d/%u@%u blks %u grps %u cr %s flags %s " "tail %u broken %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->goal_group, __entry->goal_start, __entry->goal_len, __entry->goal_logical, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical, __entry->found, __entry->groups, show_criteria(__entry->cr), show_mballoc_flags(__entry->flags), __entry->tail, __entry->buddy ? 1 << __entry->buddy : 0) ); TRACE_EVENT(ext4_mballoc_prealloc, TP_PROTO(struct ext4_allocation_context *ac), TP_ARGS(ac), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u32, orig_logical ) __field( int, orig_start ) __field( __u32, orig_group ) __field( int, orig_len ) __field( __u32, result_logical ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) ), TP_fast_assign( __entry->dev = ac->ac_inode->i_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->orig_logical = ac->ac_o_ex.fe_logical; __entry->orig_start = ac->ac_o_ex.fe_start; __entry->orig_group = ac->ac_o_ex.fe_group; __entry->orig_len = ac->ac_o_ex.fe_len; __entry->result_logical = ac->ac_b_ex.fe_logical; __entry->result_start = ac->ac_b_ex.fe_start; __entry->result_group = ac->ac_b_ex.fe_group; __entry->result_len = ac->ac_b_ex.fe_len; ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical) ); DECLARE_EVENT_CLASS(ext4__mballoc, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ino = inode ? inode->i_ino : 0; __entry->result_start = start; __entry->result_group = group; __entry->result_len = len; ), TP_printk("dev %d,%d inode %lu extent %u/%d/%d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, __entry->result_len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len) ); TRACE_EVENT(ext4_forget, TP_PROTO(struct inode *inode, int is_metadata, __u64 block), TP_ARGS(inode, is_metadata, block), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( int, is_metadata ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->block = block; __entry->is_metadata = is_metadata; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->is_metadata, __entry->block) ); TRACE_EVENT(ext4_da_update_reserve_space, TP_PROTO(struct inode *inode, int used_blocks, int quota_claim), TP_ARGS(inode, used_blocks, quota_claim), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) __field( int, quota_claim ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->used_blocks = used_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->quota_claim = quota_claim; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " "reserved_data_blocks %d quota_claim %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, __entry->quota_claim) ); TRACE_EVENT(ext4_da_reserve_space, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, reserved_data_blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu " "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->reserved_data_blocks) ); TRACE_EVENT(ext4_da_release_space, TP_PROTO(struct inode *inode, int freed_blocks), TP_ARGS(inode, freed_blocks), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->freed_blocks = freed_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d " "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->freed_blocks, __entry->reserved_data_blocks) ); DECLARE_EVENT_CLASS(ext4__bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; ), TP_printk("dev %d,%d group %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); TRACE_EVENT(ext4_read_block_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group, bool prefetch), TP_ARGS(sb, group, prefetch), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) __field( bool, prefetch ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; __entry->prefetch = prefetch; ), TP_printk("dev %d,%d group %u prefetch %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group, __entry->prefetch) ); DECLARE_EVENT_CLASS(ext4__fallocate_mode, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, offset ) __field( loff_t, len ) __field( int, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld mode %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len, show_falloc_mode(__entry->mode)) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_fallocate_enter, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_punch_hole, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); TRACE_EVENT(ext4_fallocate_exit, TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret), TP_ARGS(inode, offset, max_blocks, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, blocks ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = offset; __entry->blocks = max_blocks; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->blocks, __entry->ret) ); TRACE_EVENT(ext4_unlink_enter, TP_PROTO(struct inode *parent, struct dentry *dentry), TP_ARGS(parent, dentry), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, parent ) __field( loff_t, size ) ), TP_fast_assign( __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->parent = parent->i_ino; __entry->size = d_inode(dentry)->i_size; ), TP_printk("dev %d,%d ino %lu size %lld parent %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->size, (unsigned long) __entry->parent) ); TRACE_EVENT(ext4_unlink_exit, TP_PROTO(struct dentry *dentry, int ret), TP_ARGS(dentry, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) ), TP_fast_assign( __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) ); DECLARE_EVENT_CLASS(ext4__truncate, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->blocks = inode->i_blocks; ), TP_printk("dev %d,%d ino %lu blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->blocks) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_enter, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); /* 'ux' is the unwritten extent. */ TRACE_EVENT(ext4_ext_convert_to_initialized_enter, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, struct ext4_extent *ux), TP_ARGS(inode, map, ux), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, m_lblk ) __field( unsigned, m_len ) __field( ext4_lblk_t, u_lblk ) __field( unsigned, u_len ) __field( ext4_fsblk_t, u_pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_len = map->m_len; __entry->u_lblk = le32_to_cpu(ux->ee_block); __entry->u_len = ext4_ext_get_actual_len(ux); __entry->u_pblk = ext4_ext_pblock(ux); ), TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u " "u_pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->m_lblk, __entry->m_len, __entry->u_lblk, __entry->u_len, __entry->u_pblk) ); /* * 'ux' is the unwritten extent. * 'ix' is the initialized extent to which blocks are transferred. */ TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, struct ext4_extent *ux, struct ext4_extent *ix), TP_ARGS(inode, map, ux, ix), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, m_lblk ) __field( unsigned, m_len ) __field( ext4_lblk_t, u_lblk ) __field( unsigned, u_len ) __field( ext4_fsblk_t, u_pblk ) __field( ext4_lblk_t, i_lblk ) __field( unsigned, i_len ) __field( ext4_fsblk_t, i_pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_len = map->m_len; __entry->u_lblk = le32_to_cpu(ux->ee_block); __entry->u_len = ext4_ext_get_actual_len(ux); __entry->u_pblk = ext4_ext_pblock(ux); __entry->i_lblk = le32_to_cpu(ix->ee_block); __entry->i_len = ext4_ext_get_actual_len(ix); __entry->i_pblk = ext4_ext_pblock(ix); ), TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u " "u_lblk %u u_len %u u_pblk %llu " "i_lblk %u i_len %u i_pblk %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->m_lblk, __entry->m_len, __entry->u_lblk, __entry->u_len, __entry->u_pblk, __entry->i_lblk, __entry->i_len, __entry->i_pblk) ); DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, unsigned int flags), TP_ARGS(inode, lblk, len, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; __entry->flags = flags; ), TP_printk("dev %d,%d ino %lu lblk %u len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, show_map_flags(__entry->flags)) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned len, unsigned flags), TP_ARGS(inode, lblk, len, flags) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned len, unsigned flags), TP_ARGS(inode, lblk, len, flags) ); DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, flags ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( unsigned int, mflags ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->flags = flags; __entry->pblk = map->m_pblk; __entry->lblk = map->m_lblk; __entry->len = map->m_len; __entry->mflags = map->m_flags; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu flags %s lblk %u pblk %llu len %u " "mflags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_map_flags(__entry->flags), __entry->lblk, __entry->pblk, __entry->len, show_mflags(__entry->mflags), __entry->ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret) ); TRACE_EVENT(ext4_ext_load_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk), TP_ARGS(inode, lblk, pblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->pblk) ); TRACE_EVENT(ext4_load_inode, TP_PROTO(struct super_block *sb, unsigned long ino), TP_ARGS(sb, ino), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ino = ino; ), TP_printk("dev %d,%d ino %ld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_journal_start_sb, TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks, int revoke_creds, int type, unsigned long IP), TP_ARGS(sb, blocks, rsv_blocks, revoke_creds, type, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned long, ip ) __field( int, blocks ) __field( int, rsv_blocks ) __field( int, revoke_creds ) __field( int, type ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ip = IP; __entry->blocks = blocks; __entry->rsv_blocks = rsv_blocks; __entry->revoke_creds = revoke_creds; __entry->type = type; ), TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d," " type %d, caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks, __entry->revoke_creds, __entry->type, (void *)__entry->ip) ); TRACE_EVENT(ext4_journal_start_inode, TP_PROTO(struct inode *inode, int blocks, int rsv_blocks, int revoke_creds, int type, unsigned long IP), TP_ARGS(inode, blocks, rsv_blocks, revoke_creds, type, IP), TP_STRUCT__entry( __field( unsigned long, ino ) __field( dev_t, dev ) __field( unsigned long, ip ) __field( int, blocks ) __field( int, rsv_blocks ) __field( int, revoke_creds ) __field( int, type ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ip = IP; __entry->blocks = blocks; __entry->rsv_blocks = rsv_blocks; __entry->revoke_creds = revoke_creds; __entry->type = type; __entry->ino = inode->i_ino; ), TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d," " type %d, ino %lu, caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks, __entry->revoke_creds, __entry->type, __entry->ino, (void *)__entry->ip) ); TRACE_EVENT(ext4_journal_start_reserved, TP_PROTO(struct super_block *sb, int blocks, unsigned long IP), TP_ARGS(sb, blocks, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field(unsigned long, ip ) __field( int, blocks ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ip = IP; __entry->blocks = blocks; ), TP_printk("dev %d,%d blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, (void *)__entry->ip) ); DECLARE_EVENT_CLASS(ext4__trim, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len), TP_STRUCT__entry( __field( int, dev_major ) __field( int, dev_minor ) __field( __u32, group ) __field( int, start ) __field( int, len ) ), TP_fast_assign( __entry->dev_major = MAJOR(sb->s_dev); __entry->dev_minor = MINOR(sb->s_dev); __entry->group = group; __entry->start = start; __entry->len = len; ), TP_printk("dev %d,%d group %u, start %d, len %d", __entry->dev_major, __entry->dev_minor, __entry->group, __entry->start, __entry->len) ); DEFINE_EVENT(ext4__trim, ext4_trim_extent, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len) ); DEFINE_EVENT(ext4__trim, ext4_trim_all_free, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len) ); TRACE_EVENT(ext4_ext_handle_unwritten_extents, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int flags, unsigned int allocated, ext4_fsblk_t newblock), TP_ARGS(inode, map, flags, allocated, newblock), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, flags ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) __field( unsigned int, len ) __field( unsigned int, allocated ) __field( ext4_fsblk_t, newblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->flags = flags; __entry->lblk = map->m_lblk; __entry->pblk = map->m_pblk; __entry->len = map->m_len; __entry->allocated = allocated; __entry->newblk = newblock; ), TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %s " "allocated %d newblock %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, __entry->len, show_map_flags(__entry->flags), (unsigned int) __entry->allocated, (unsigned long long) __entry->newblk) ); TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret), TP_ARGS(sb, map, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned int, flags ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) __field( unsigned int, len ) __field( int, ret ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->flags = map->m_flags; __entry->lblk = map->m_lblk; __entry->pblk = map->m_pblk; __entry->len = map->m_len; __entry->ret = ret; ), TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lblk, (unsigned long long) __entry->pblk, __entry->len, show_mflags(__entry->flags), __entry->ret) ); TRACE_EVENT(ext4_ext_show_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, unsigned short len), TP_ARGS(inode, lblk, pblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) __field( unsigned short, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, (unsigned short) __entry->len) ); TRACE_EVENT(ext4_remove_blocks, TP_PROTO(struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_fsblk_t to, struct partial_cluster *pc), TP_ARGS(inode, ex, from, to, pc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, from ) __field( ext4_lblk_t, to ) __field( ext4_fsblk_t, ee_pblk ) __field( ext4_lblk_t, ee_lblk ) __field( unsigned short, ee_len ) __field( ext4_fsblk_t, pc_pclu ) __field( ext4_lblk_t, pc_lblk ) __field( int, pc_state) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->from = from; __entry->to = to; __entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_len = ext4_ext_get_actual_len(ex); __entry->pc_pclu = pc->pclu; __entry->pc_lblk = pc->lblk; __entry->pc_state = pc->state; ), TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" "from %u to %u partial [pclu %lld lblk %u state %d]", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->ee_lblk, (unsigned long long) __entry->ee_pblk, (unsigned short) __entry->ee_len, (unsigned) __entry->from, (unsigned) __entry->to, (long long) __entry->pc_pclu, (unsigned int) __entry->pc_lblk, (int) __entry->pc_state) ); TRACE_EVENT(ext4_ext_rm_leaf, TP_PROTO(struct inode *inode, ext4_lblk_t start, struct ext4_extent *ex, struct partial_cluster *pc), TP_ARGS(inode, start, ex, pc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, ee_lblk ) __field( ext4_fsblk_t, ee_pblk ) __field( short, ee_len ) __field( ext4_fsblk_t, pc_pclu ) __field( ext4_lblk_t, pc_lblk ) __field( int, pc_state) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_len = ext4_ext_get_actual_len(ex); __entry->pc_pclu = pc->pclu; __entry->pc_lblk = pc->lblk; __entry->pc_state = pc->state; ), TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" "partial [pclu %lld lblk %u state %d]", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->ee_lblk, (unsigned long long) __entry->ee_pblk, (unsigned short) __entry->ee_len, (long long) __entry->pc_pclu, (unsigned int) __entry->pc_lblk, (int) __entry->pc_state) ); TRACE_EVENT(ext4_ext_rm_idx, TP_PROTO(struct inode *inode, ext4_fsblk_t pblk), TP_ARGS(inode, pblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; ), TP_printk("dev %d,%d ino %lu index_pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long long) __entry->pblk) ); TRACE_EVENT(ext4_ext_remove_space, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, int depth), TP_ARGS(inode, start, end, depth), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, end ) __field( int, depth ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->end = end; __entry->depth = depth; ), TP_printk("dev %d,%d ino %lu since %u end %u depth %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->end, __entry->depth) ); TRACE_EVENT(ext4_ext_remove_space_done, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, int depth, struct partial_cluster *pc, __le16 eh_entries), TP_ARGS(inode, start, end, depth, pc, eh_entries), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, end ) __field( int, depth ) __field( ext4_fsblk_t, pc_pclu ) __field( ext4_lblk_t, pc_lblk ) __field( int, pc_state ) __field( unsigned short, eh_entries ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->end = end; __entry->depth = depth; __entry->pc_pclu = pc->pclu; __entry->pc_lblk = pc->lblk; __entry->pc_state = pc->state; __entry->eh_entries = le16_to_cpu(eh_entries); ), TP_printk("dev %d,%d ino %lu since %u end %u depth %d " "partial [pclu %lld lblk %u state %d] " "remaining_entries %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->end, __entry->depth, (long long) __entry->pc_pclu, (unsigned int) __entry->pc_lblk, (int) __entry->pc_state, (unsigned short) __entry->eh_entries) ); DECLARE_EVENT_CLASS(ext4__es_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status)) ); DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es) ); DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es) ); TRACE_EVENT(ext4_es_remove_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), TP_ARGS(inode, lblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, lblk ) __field( loff_t, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu es [%lld/%lld)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len) ); TRACE_EVENT(ext4_es_find_extent_range_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk) ); TRACE_EVENT(ext4_es_find_extent_range_exit, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status)) ); TRACE_EVENT(ext4_es_lookup_extent_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk) ); TRACE_EVENT(ext4_es_lookup_extent_exit, TP_PROTO(struct inode *inode, struct extent_status *es, int found), TP_ARGS(inode, es, found), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) __field( int, found ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); __entry->found = found; ), TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->found, __entry->lblk, __entry->len, __entry->found ? __entry->pblk : 0, show_extent_status(__entry->found ? __entry->status : 0)) ); DECLARE_EVENT_CLASS(ext4__es_shrink_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_to_scan ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_to_scan = nr_to_scan; __entry->cache_cnt = cache_cnt; ), TP_printk("dev %d,%d nr_to_scan %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_to_scan, __entry->cache_cnt) ); DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt) ); DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt) ); TRACE_EVENT(ext4_es_shrink_scan_exit, TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt), TP_ARGS(sb, nr_shrunk, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_shrunk ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_shrunk = nr_shrunk; __entry->cache_cnt = cache_cnt; ), TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, __entry->cache_cnt) ); TRACE_EVENT(ext4_collapse_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len), TP_ARGS(inode, offset, len), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(loff_t, offset) __field(loff_t, len) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len) ); TRACE_EVENT(ext4_insert_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len), TP_ARGS(inode, offset, len), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(loff_t, offset) __field(loff_t, len) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len) ); TRACE_EVENT(ext4_es_shrink, TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, int nr_skipped, int retried), TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_shrunk ) __field( unsigned long long, scan_time ) __field( int, nr_skipped ) __field( int, retried ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_shrunk = nr_shrunk; __entry->scan_time = div_u64(scan_time, 1000); __entry->nr_skipped = nr_skipped; __entry->retried = retried; ), TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu " "nr_skipped %d retried %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, __entry->scan_time, __entry->nr_skipped, __entry->retried) ); TRACE_EVENT(ext4_es_insert_delayed_block, TP_PROTO(struct inode *inode, struct extent_status *es, bool allocated), TP_ARGS(inode, es, allocated), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) __field( bool, allocated ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); __entry->allocated = allocated; ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s " "allocated %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status), __entry->allocated) ); /* fsmap traces */ DECLARE_EVENT_CLASS(ext4_fsmap_class, TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, u64 owner), TP_ARGS(sb, keydev, agno, bno, len, owner), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(u32, agno) __field(u64, bno) __field(u64, len) __field(u64, owner) ), TP_fast_assign( __entry->dev = sb->s_bdev->bd_dev; __entry->keydev = new_decode_dev(keydev); __entry->agno = agno; __entry->bno = bno; __entry->len = len; __entry->owner = owner; ), TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld\n", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->agno, __entry->bno, __entry->len, __entry->owner) ) #define DEFINE_FSMAP_EVENT(name) \ DEFINE_EVENT(ext4_fsmap_class, name, \ TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, \ u64 owner), \ TP_ARGS(sb, keydev, agno, bno, len, owner)) DEFINE_FSMAP_EVENT(ext4_fsmap_low_key); DEFINE_FSMAP_EVENT(ext4_fsmap_high_key); DEFINE_FSMAP_EVENT(ext4_fsmap_mapping); DECLARE_EVENT_CLASS(ext4_getfsmap_class, TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), TP_ARGS(sb, fsmap), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(u64, block) __field(u64, len) __field(u64, owner) __field(u64, flags) ), TP_fast_assign( __entry->dev = sb->s_bdev->bd_dev; __entry->keydev = new_decode_dev(fsmap->fmr_device); __entry->block = fsmap->fmr_physical; __entry->len = fsmap->fmr_length; __entry->owner = fsmap->fmr_owner; __entry->flags = fsmap->fmr_flags; ), TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld flags 0x%llx\n", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->block, __entry->len, __entry->owner, __entry->flags) ) #define DEFINE_GETFSMAP_EVENT(name) \ DEFINE_EVENT(ext4_getfsmap_class, name, \ TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), \ TP_ARGS(sb, fsmap)) DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping); TRACE_EVENT(ext4_shutdown, TP_PROTO(struct super_block *sb, unsigned long flags), TP_ARGS(sb, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned, flags ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->flags = flags; ), TP_printk("dev %d,%d flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->flags) ); TRACE_EVENT(ext4_error, TP_PROTO(struct super_block *sb, const char *function, unsigned int line), TP_ARGS(sb, function, line), TP_STRUCT__entry( __field( dev_t, dev ) __field( const char *, function ) __field( unsigned, line ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->function = function; __entry->line = line; ), TP_printk("dev %d,%d function %s line %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->function, __entry->line) ); TRACE_EVENT(ext4_prefetch_bitmaps, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_group_t next, unsigned int prefetch_ios), TP_ARGS(sb, group, next, prefetch_ios), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) __field( __u32, next ) __field( __u32, ios ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; __entry->next = next; __entry->ios = prefetch_ios; ), TP_printk("dev %d,%d group %u next %u ios %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group, __entry->next, __entry->ios) ); TRACE_EVENT(ext4_lazy_itable_init, TP_PROTO(struct super_block *sb, ext4_group_t group), TP_ARGS(sb, group), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; ), TP_printk("dev %d,%d group %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group) ); TRACE_EVENT(ext4_fc_replay_scan, TP_PROTO(struct super_block *sb, int error, int off), TP_ARGS(sb, error, off), TP_STRUCT__entry( __field(dev_t, dev) __field(int, error) __field(int, off) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->error = error; __entry->off = off; ), TP_printk("dev %d,%d error %d, off %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->error, __entry->off) ); TRACE_EVENT(ext4_fc_replay, TP_PROTO(struct super_block *sb, int tag, int ino, int priv1, int priv2), TP_ARGS(sb, tag, ino, priv1, priv2), TP_STRUCT__entry( __field(dev_t, dev) __field(int, tag) __field(int, ino) __field(int, priv1) __field(int, priv2) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->tag = tag; __entry->ino = ino; __entry->priv1 = priv1; __entry->priv2 = priv2; ), TP_printk("dev %d,%d: tag %d, ino %d, data1 %d, data2 %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tag, __entry->ino, __entry->priv1, __entry->priv2) ); TRACE_EVENT(ext4_fc_commit_start, TP_PROTO(struct super_block *sb, tid_t commit_tid), TP_ARGS(sb, commit_tid), TP_STRUCT__entry( __field(dev_t, dev) __field(tid_t, tid) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->tid = commit_tid; ), TP_printk("dev %d,%d tid %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid) ); TRACE_EVENT(ext4_fc_commit_stop, TP_PROTO(struct super_block *sb, int nblks, int reason, tid_t commit_tid), TP_ARGS(sb, nblks, reason, commit_tid), TP_STRUCT__entry( __field(dev_t, dev) __field(int, nblks) __field(int, reason) __field(int, num_fc) __field(int, num_fc_ineligible) __field(int, nblks_agg) __field(tid_t, tid) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nblks = nblks; __entry->reason = reason; __entry->num_fc = EXT4_SB(sb)->s_fc_stats.fc_num_commits; __entry->num_fc_ineligible = EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; __entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks; __entry->tid = commit_tid; ), TP_printk("dev %d,%d nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d, tid %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nblks, __entry->reason, __entry->num_fc, __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid) ); #define FC_REASON_NAME_STAT(reason) \ show_fc_reason(reason), \ __entry->fc_ineligible_rc[reason] TRACE_EVENT(ext4_fc_stats, TP_PROTO(struct super_block *sb), TP_ARGS(sb), TP_STRUCT__entry( __field(dev_t, dev) __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX) __field(unsigned long, fc_commits) __field(unsigned long, fc_ineligible_commits) __field(unsigned long, fc_numblks) ), TP_fast_assign( int i; __entry->dev = sb->s_dev; for (i = 0; i < EXT4_FC_REASON_MAX; i++) { __entry->fc_ineligible_rc[i] = EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i]; } __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits; __entry->fc_ineligible_commits = EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks; ), TP_printk("dev %d,%d fc ineligible reasons:\n" "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u" "num_commits:%lu, ineligible: %lu, numblks: %lu", MAJOR(__entry->dev), MINOR(__entry->dev), FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), FC_REASON_NAME_STAT(EXT4_FC_REASON_ENCRYPTED_FILENAME), __entry->fc_commits, __entry->fc_ineligible_commits, __entry->fc_numblks) ); DECLARE_EVENT_CLASS(ext4_fc_track_dentry, TP_PROTO(handle_t *handle, struct inode *inode, struct dentry *dentry, int ret), TP_ARGS(handle, inode, dentry, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(tid_t, t_tid) __field(ino_t, i_ino) __field(tid_t, i_sync_tid) __field(int, error) ), TP_fast_assign( struct ext4_inode_info *ei = EXT4_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->t_tid = handle->h_transaction->t_tid; __entry->i_ino = inode->i_ino; __entry->i_sync_tid = ei->i_sync_tid; __entry->error = ret; ), TP_printk("dev %d,%d, t_tid %u, ino %lu, i_sync_tid %u, error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, __entry->error ) ); #define DEFINE_EVENT_CLASS_DENTRY(__type) \ DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type, \ TP_PROTO(handle_t *handle, struct inode *inode, \ struct dentry *dentry, int ret), \ TP_ARGS(handle, inode, dentry, ret) \ ) DEFINE_EVENT_CLASS_DENTRY(create); DEFINE_EVENT_CLASS_DENTRY(link); DEFINE_EVENT_CLASS_DENTRY(unlink); TRACE_EVENT(ext4_fc_track_inode, TP_PROTO(handle_t *handle, struct inode *inode, int ret), TP_ARGS(handle, inode, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(tid_t, t_tid) __field(ino_t, i_ino) __field(tid_t, i_sync_tid) __field(int, error) ), TP_fast_assign( struct ext4_inode_info *ei = EXT4_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->t_tid = handle->h_transaction->t_tid; __entry->i_ino = inode->i_ino; __entry->i_sync_tid = ei->i_sync_tid; __entry->error = ret; ), TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, __entry->error) ); TRACE_EVENT(ext4_fc_track_range, TP_PROTO(handle_t *handle, struct inode *inode, long start, long end, int ret), TP_ARGS(handle, inode, start, end, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(tid_t, t_tid) __field(ino_t, i_ino) __field(tid_t, i_sync_tid) __field(long, start) __field(long, end) __field(int, error) ), TP_fast_assign( struct ext4_inode_info *ei = EXT4_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->t_tid = handle->h_transaction->t_tid; __entry->i_ino = inode->i_ino; __entry->i_sync_tid = ei->i_sync_tid; __entry->start = start; __entry->end = end; __entry->error = ret; ), TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d, start %ld, end %ld", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, __entry->error, __entry->start, __entry->end) ); TRACE_EVENT(ext4_fc_cleanup, TP_PROTO(journal_t *journal, int full, tid_t tid), TP_ARGS(journal, full, tid), TP_STRUCT__entry( __field(dev_t, dev) __field(int, j_fc_off) __field(int, full) __field(tid_t, tid) ), TP_fast_assign( struct super_block *sb = journal->j_private; __entry->dev = sb->s_dev; __entry->j_fc_off = journal->j_fc_off; __entry->full = full; __entry->tid = tid; ), TP_printk("dev %d,%d, j_fc_off %d, full %d, tid %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->j_fc_off, __entry->full, __entry->tid) ); TRACE_EVENT(ext4_update_sb, TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, unsigned int flags), TP_ARGS(sb, fsblk, flags), TP_STRUCT__entry( __field(dev_t, dev) __field(ext4_fsblk_t, fsblk) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->fsblk = fsblk; __entry->flags = flags; ), TP_printk("dev %d,%d fsblk %llu flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fsblk, __entry->flags) ); #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
46 1 1 46 1 42 1 1 46 46 46 1 45 2 1 43 43 43 41 40 39 38 37 2 35 34 33 32 32 32 32 32 32 32 32 32 32 32 31 32 32 31 31 31 31 1 30 3 26 4 23 26 3 23 19 19 23 3 3 1 2 22 21 20 11 9 9 9 6 36 22 36 40 46 1 34 1 1 1 47 47 47 1 1 1 2 2 2 2 18 54 54 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * * super.c */ /* * This file implements code to read the superblock, read and initialise * in-memory structures at mount time, and all the VFS glue code to register * the filesystem. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/blkdev.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/vfs.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/seq_file.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/module.h> #include <linux/magic.h> #include <linux/xattr.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" #include "decompressor.h" #include "xattr.h" static struct file_system_type squashfs_fs_type; static const struct super_operations squashfs_super_ops; enum Opt_errors { Opt_errors_continue, Opt_errors_panic, }; enum squashfs_param { Opt_errors, Opt_threads, }; struct squashfs_mount_opts { enum Opt_errors errors; const struct squashfs_decompressor_thread_ops *thread_ops; int thread_num; }; static const struct constant_table squashfs_param_errors[] = { {"continue", Opt_errors_continue }, {"panic", Opt_errors_panic }, {} }; static const struct fs_parameter_spec squashfs_fs_parameters[] = { fsparam_enum("errors", Opt_errors, squashfs_param_errors), fsparam_string("threads", Opt_threads), {} }; static int squashfs_parse_param_threads_str(const char *str, struct squashfs_mount_opts *opts) { #ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT if (strcmp(str, "single") == 0) { opts->thread_ops = &squashfs_decompressor_single; return 0; } if (strcmp(str, "multi") == 0) { opts->thread_ops = &squashfs_decompressor_multi; return 0; } if (strcmp(str, "percpu") == 0) { opts->thread_ops = &squashfs_decompressor_percpu; return 0; } #endif return -EINVAL; } static int squashfs_parse_param_threads_num(const char *str, struct squashfs_mount_opts *opts) { #ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS int ret; unsigned long num; ret = kstrtoul(str, 0, &num); if (ret != 0) return -EINVAL; if (num > 1) { opts->thread_ops = &squashfs_decompressor_multi; if (num > opts->thread_ops->max_decompressors()) return -EINVAL; opts->thread_num = (int)num; return 0; } #ifdef CONFIG_SQUASHFS_DECOMP_SINGLE if (num == 1) { opts->thread_ops = &squashfs_decompressor_single; opts->thread_num = 1; return 0; } #endif #endif /* !CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS */ return -EINVAL; } static int squashfs_parse_param_threads(const char *str, struct squashfs_mount_opts *opts) { int ret = squashfs_parse_param_threads_str(str, opts); if (ret == 0) return ret; return squashfs_parse_param_threads_num(str, opts); } static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct squashfs_mount_opts *opts = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, squashfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_errors: opts->errors = result.uint_32; break; case Opt_threads: if (squashfs_parse_param_threads(param->string, opts) != 0) return -EINVAL; break; default: return -EINVAL; } return 0; } static const struct squashfs_decompressor *supported_squashfs_filesystem( struct fs_context *fc, short major, short minor, short id) { const struct squashfs_decompressor *decompressor; if (major < SQUASHFS_MAJOR) { errorf(fc, "Major/Minor mismatch, older Squashfs %d.%d " "filesystems are unsupported", major, minor); return NULL; } else if (major > SQUASHFS_MAJOR || minor > SQUASHFS_MINOR) { errorf(fc, "Major/Minor mismatch, trying to mount newer " "%d.%d filesystem", major, minor); errorf(fc, "Please update your kernel"); return NULL; } decompressor = squashfs_lookup_decompressor(id); if (!decompressor->supported) { errorf(fc, "Filesystem uses \"%s\" compression. This is not supported", decompressor->name); return NULL; } return decompressor; } static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct squashfs_mount_opts *opts = fc->fs_private; struct squashfs_sb_info *msblk; struct squashfs_super_block *sblk = NULL; struct inode *root; long long root_inode; unsigned short flags; unsigned int fragments; u64 lookup_table_start, xattr_id_table_start, next_table; int err; TRACE("Entered squashfs_fill_superblock\n"); sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); if (sb->s_fs_info == NULL) { ERROR("Failed to allocate squashfs_sb_info\n"); return -ENOMEM; } msblk = sb->s_fs_info; msblk->thread_ops = opts->thread_ops; msblk->panic_on_errors = (opts->errors == Opt_errors_panic); msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); /* * msblk->bytes_used is checked in squashfs_read_table to ensure reads * are not beyond filesystem end. But as we're using * squashfs_read_table here to read the superblock (including the value * of bytes_used) we need to set it to an initial sensible dummy value */ msblk->bytes_used = sizeof(*sblk); sblk = squashfs_read_table(sb, SQUASHFS_START, sizeof(*sblk)); if (IS_ERR(sblk)) { errorf(fc, "unable to read squashfs_super_block"); err = PTR_ERR(sblk); sblk = NULL; goto failed_mount; } err = -EINVAL; /* Check it is a SQUASHFS superblock */ sb->s_magic = le32_to_cpu(sblk->s_magic); if (sb->s_magic != SQUASHFS_MAGIC) { if (!(fc->sb_flags & SB_SILENT)) errorf(fc, "Can't find a SQUASHFS superblock on %pg", sb->s_bdev); goto failed_mount; } if (opts->thread_num == 0) { msblk->max_thread_num = msblk->thread_ops->max_decompressors(); } else { msblk->max_thread_num = opts->thread_num; } /* Check the MAJOR & MINOR versions and lookup compression type */ msblk->decompressor = supported_squashfs_filesystem( fc, le16_to_cpu(sblk->s_major), le16_to_cpu(sblk->s_minor), le16_to_cpu(sblk->compression)); if (msblk->decompressor == NULL) goto failed_mount; /* Check the filesystem does not extend beyond the end of the block device */ msblk->bytes_used = le64_to_cpu(sblk->bytes_used); if (msblk->bytes_used < 0 || msblk->bytes_used > bdev_nr_bytes(sb->s_bdev)) goto failed_mount; /* Check block size for sanity */ msblk->block_size = le32_to_cpu(sblk->block_size); if (msblk->block_size > SQUASHFS_FILE_MAX_SIZE) goto insanity; /* * Check the system page size is not larger than the filesystem * block size (by default 128K). This is currently not supported. */ if (PAGE_SIZE > msblk->block_size) { errorf(fc, "Page size > filesystem block size (%d). This is " "currently not supported!", msblk->block_size); goto failed_mount; } /* Check block log for sanity */ msblk->block_log = le16_to_cpu(sblk->block_log); if (msblk->block_log > SQUASHFS_FILE_MAX_LOG) goto failed_mount; /* Check that block_size and block_log match */ if (msblk->block_size != (1 << msblk->block_log)) goto insanity; /* Check the root inode for sanity */ root_inode = le64_to_cpu(sblk->root_inode); if (SQUASHFS_INODE_OFFSET(root_inode) > SQUASHFS_METADATA_SIZE) goto insanity; msblk->inode_table = le64_to_cpu(sblk->inode_table_start); msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); msblk->fragments = le32_to_cpu(sblk->fragments); msblk->ids = le16_to_cpu(sblk->no_ids); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(flags) ? "un" : ""); TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(flags) ? "un" : ""); TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); TRACE("Number of fragments %d\n", msblk->fragments); TRACE("Number of ids %d\n", msblk->ids); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); TRACE("sblk->fragment_table_start %llx\n", (u64) le64_to_cpu(sblk->fragment_table_start)); TRACE("sblk->id_table_start %llx\n", (u64) le64_to_cpu(sblk->id_table_start)); sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_min = 0; sb->s_time_max = U32_MAX; sb->s_flags |= SB_RDONLY; sb->s_op = &squashfs_super_ops; err = -ENOMEM; msblk->block_cache = squashfs_cache_init("metadata", SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); if (msblk->block_cache == NULL) goto failed_mount; /* Allocate read_page block */ msblk->read_page = squashfs_cache_init("data", msblk->max_thread_num, msblk->block_size); if (msblk->read_page == NULL) { errorf(fc, "Failed to allocate read_page block"); goto failed_mount; } if (msblk->devblksize == PAGE_SIZE) { struct inode *cache = new_inode(sb); if (cache == NULL) goto failed_mount; set_nlink(cache, 1); cache->i_size = OFFSET_MAX; mapping_set_gfp_mask(cache->i_mapping, GFP_NOFS); msblk->cache_mapping = cache->i_mapping; } msblk->stream = squashfs_decompressor_setup(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); msblk->stream = NULL; goto insanity; } /* Handle xattrs */ sb->s_xattr = squashfs_xattr_handlers; xattr_id_table_start = le64_to_cpu(sblk->xattr_id_table_start); if (xattr_id_table_start == SQUASHFS_INVALID_BLK) { next_table = msblk->bytes_used; goto allocate_id_index_table; } /* Allocate and read xattr id lookup table */ msblk->xattr_id_table = squashfs_read_xattr_id_table(sb, xattr_id_table_start, &msblk->xattr_table, &msblk->xattr_ids); if (IS_ERR(msblk->xattr_id_table)) { errorf(fc, "unable to read xattr id index table"); err = PTR_ERR(msblk->xattr_id_table); msblk->xattr_id_table = NULL; if (err != -ENOTSUPP) goto failed_mount; } next_table = msblk->xattr_table; allocate_id_index_table: /* Allocate and read id index table */ msblk->id_table = squashfs_read_id_index_table(sb, le64_to_cpu(sblk->id_table_start), next_table, msblk->ids); if (IS_ERR(msblk->id_table)) { errorf(fc, "unable to read id index table"); err = PTR_ERR(msblk->id_table); msblk->id_table = NULL; goto failed_mount; } next_table = le64_to_cpu(msblk->id_table[0]); /* Handle inode lookup table */ lookup_table_start = le64_to_cpu(sblk->lookup_table_start); if (lookup_table_start == SQUASHFS_INVALID_BLK) goto handle_fragments; /* Allocate and read inode lookup table */ msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb, lookup_table_start, next_table, msblk->inodes); if (IS_ERR(msblk->inode_lookup_table)) { errorf(fc, "unable to read inode lookup table"); err = PTR_ERR(msblk->inode_lookup_table); msblk->inode_lookup_table = NULL; goto failed_mount; } next_table = le64_to_cpu(msblk->inode_lookup_table[0]); sb->s_export_op = &squashfs_export_ops; handle_fragments: fragments = msblk->fragments; if (fragments == 0) goto check_directory_table; msblk->fragment_cache = squashfs_cache_init("fragment", SQUASHFS_CACHED_FRAGMENTS, msblk->block_size); if (msblk->fragment_cache == NULL) { err = -ENOMEM; goto failed_mount; } /* Allocate and read fragment index table */ msblk->fragment_index = squashfs_read_fragment_index_table(sb, le64_to_cpu(sblk->fragment_table_start), next_table, fragments); if (IS_ERR(msblk->fragment_index)) { errorf(fc, "unable to read fragment index table"); err = PTR_ERR(msblk->fragment_index); msblk->fragment_index = NULL; goto failed_mount; } next_table = le64_to_cpu(msblk->fragment_index[0]); check_directory_table: /* Sanity check directory_table */ if (msblk->directory_table > next_table) { err = -EINVAL; goto insanity; } /* Sanity check inode_table */ if (msblk->inode_table >= msblk->directory_table) { err = -EINVAL; goto insanity; } /* allocate root */ root = new_inode(sb); if (!root) { err = -ENOMEM; goto failed_mount; } err = squashfs_read_inode(root, root_inode); if (err) { make_bad_inode(root); iput(root); goto failed_mount; } insert_inode_hash(root); sb->s_root = d_make_root(root); if (sb->s_root == NULL) { ERROR("Root inode create failed\n"); err = -ENOMEM; goto failed_mount; } TRACE("Leaving squashfs_fill_super\n"); kfree(sblk); return 0; insanity: errorf(fc, "squashfs image failed sanity check"); failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); if (msblk->cache_mapping) iput(msblk->cache_mapping->host); msblk->thread_ops->destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); kfree(msblk->xattr_id_table); kfree(sb->s_fs_info); sb->s_fs_info = NULL; kfree(sblk); return err; } static int squashfs_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, squashfs_fill_super); } static int squashfs_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct squashfs_sb_info *msblk = sb->s_fs_info; struct squashfs_mount_opts *opts = fc->fs_private; sync_filesystem(fc->root->d_sb); fc->sb_flags |= SB_RDONLY; msblk->panic_on_errors = (opts->errors == Opt_errors_panic); return 0; } static void squashfs_free_fs_context(struct fs_context *fc) { kfree(fc->fs_private); } static const struct fs_context_operations squashfs_context_ops = { .get_tree = squashfs_get_tree, .free = squashfs_free_fs_context, .parse_param = squashfs_parse_param, .reconfigure = squashfs_reconfigure, }; static int squashfs_show_options(struct seq_file *s, struct dentry *root) { struct super_block *sb = root->d_sb; struct squashfs_sb_info *msblk = sb->s_fs_info; if (msblk->panic_on_errors) seq_puts(s, ",errors=panic"); else seq_puts(s, ",errors=continue"); #ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT if (msblk->thread_ops == &squashfs_decompressor_single) { seq_puts(s, ",threads=single"); return 0; } if (msblk->thread_ops == &squashfs_decompressor_percpu) { seq_puts(s, ",threads=percpu"); return 0; } #endif #ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS seq_printf(s, ",threads=%d", msblk->max_thread_num); #endif return 0; } static int squashfs_init_fs_context(struct fs_context *fc) { struct squashfs_mount_opts *opts; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) return -ENOMEM; #ifdef CONFIG_SQUASHFS_DECOMP_SINGLE opts->thread_ops = &squashfs_decompressor_single; #elif defined(CONFIG_SQUASHFS_DECOMP_MULTI) opts->thread_ops = &squashfs_decompressor_multi; #elif defined(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) opts->thread_ops = &squashfs_decompressor_percpu; #else #error "fail: unknown squashfs decompression thread mode?" #endif opts->thread_num = 0; fc->fs_private = opts; fc->ops = &squashfs_context_ops; return 0; } static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info; u64 id = huge_encode_dev(dentry->d_sb->s_bdev->bd_dev); TRACE("Entered squashfs_statfs\n"); buf->f_type = SQUASHFS_MAGIC; buf->f_bsize = msblk->block_size; buf->f_blocks = ((msblk->bytes_used - 1) >> msblk->block_log) + 1; buf->f_bfree = buf->f_bavail = 0; buf->f_files = msblk->inodes; buf->f_ffree = 0; buf->f_namelen = SQUASHFS_NAME_LEN; buf->f_fsid = u64_to_fsid(id); return 0; } static void squashfs_put_super(struct super_block *sb) { if (sb->s_fs_info) { struct squashfs_sb_info *sbi = sb->s_fs_info; squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); if (sbi->cache_mapping) iput(sbi->cache_mapping->host); sbi->thread_ops->destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); kfree(sbi->inode_lookup_table); kfree(sbi->xattr_id_table); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } } static struct kmem_cache *squashfs_inode_cachep; static void init_once(void *foo) { struct squashfs_inode_info *ei = foo; inode_init_once(&ei->vfs_inode); } static int __init init_inodecache(void) { squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache", sizeof(struct squashfs_inode_info), 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, init_once); return squashfs_inode_cachep ? 0 : -ENOMEM; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(squashfs_inode_cachep); } static int __init init_squashfs_fs(void) { int err = init_inodecache(); if (err) return err; err = register_filesystem(&squashfs_fs_type); if (err) { destroy_inodecache(); return err; } pr_info("version 4.0 (2009/01/31) Phillip Lougher\n"); return 0; } static void __exit exit_squashfs_fs(void) { unregister_filesystem(&squashfs_fs_type); destroy_inodecache(); } static struct inode *squashfs_alloc_inode(struct super_block *sb) { struct squashfs_inode_info *ei = alloc_inode_sb(sb, squashfs_inode_cachep, GFP_KERNEL); return ei ? &ei->vfs_inode : NULL; } static void squashfs_free_inode(struct inode *inode) { kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); } static struct file_system_type squashfs_fs_type = { .owner = THIS_MODULE, .name = "squashfs", .init_fs_context = squashfs_init_fs_context, .parameters = squashfs_fs_parameters, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("squashfs"); static const struct super_operations squashfs_super_ops = { .alloc_inode = squashfs_alloc_inode, .free_inode = squashfs_free_inode, .statfs = squashfs_statfs, .put_super = squashfs_put_super, .show_options = squashfs_show_options, }; module_init(init_squashfs_fs); module_exit(exit_squashfs_fs); MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem"); MODULE_AUTHOR("Phillip Lougher <phillip@squashfs.org.uk>"); MODULE_LICENSE("GPL");
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 /* file-mmu.c: ramfs MMU-based file operations * * Resizable simple ram filesystem for Linux. * * Copyright (C) 2000 Linus Torvalds. * 2000 Transmeta Corp. * * Usage limits added by David Gibson, Linuxcare Australia. * This file is released under the GPL. */ /* * NOTE! This filesystem is probably most useful * not as a real filesystem, but as an example of * how virtual filesystems can be written. * * It doesn't get much simpler than this. Consider * that this file implements the full semantics of * a POSIX-compliant read-write filesystem. * * Note in particular how the filesystem does not * need to implement any data structures of its own * to keep track of the virtual data: using the VFS * caches is sufficient. */ #include <linux/fs.h> #include <linux/mm.h> #include <linux/ramfs.h> #include <linux/sched.h> #include "internal.h" static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); } const struct file_operations ramfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = noop_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, .get_unmapped_area = ramfs_mmu_get_unmapped_area, }; const struct inode_operations ramfs_file_inode_operations = { .setattr = simple_setattr, .getattr = simple_getattr, };
3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> #include <linux/icmp.h> #include <net/ipv6.h> #include <net/tcp.h> #include <net/udp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_tcpudp.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("xt_tcp"); MODULE_ALIAS("xt_udp"); MODULE_ALIAS("ipt_udp"); MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); MODULE_ALIAS("ipt_icmp"); MODULE_ALIAS("ip6t_icmp6"); /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline bool port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert) { return (port >= min && port <= max) ^ invert; } static bool tcp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, unsigned int optlen, bool invert, bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ const u_int8_t *op; u_int8_t _opt[60 - sizeof(struct tcphdr)]; unsigned int i; pr_debug("finding option\n"); if (!optlen) return invert; /* If we don't have the whole header, drop packet. */ op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr), optlen, _opt); if (op == NULL) { *hotdrop = true; return false; } for (i = 0; i < optlen; ) { if (op[i] == option) return !invert; if (op[i] < 2) i++; else i += op[i+1]?:1; } return invert; } static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct tcphdr *th; struct tcphdr _tcph; const struct xt_tcp *tcpinfo = par->matchinfo; if (par->fragoff != 0) { /* To quote Alan: Don't allow a fragment of TCP 8 bytes in. Nobody normal causes this. Its a cracker trying to break in by doing a flag overwrite to pass the direction checks. */ if (par->fragoff == 1) { pr_debug("Dropping evil TCP offset=1 frag.\n"); par->hotdrop = true; } /* Must not be a fragment. */ return false; } th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil TCP offset=0 tinygram.\n"); par->hotdrop = true; return false; } if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], ntohs(th->source), !!(tcpinfo->invflags & XT_TCP_INV_SRCPT))) return false; if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) return false; if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS, (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp)) return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { par->hotdrop = true; return false; } if (!tcp_find_option(tcpinfo->option, skb, par->thoff, th->doff*4 - sizeof(_tcph), tcpinfo->invflags & XT_TCP_INV_OPTION, &par->hotdrop)) return false; } return true; } static int tcp_mt_check(const struct xt_mtchk_param *par) { const struct xt_tcp *tcpinfo = par->matchinfo; /* Must specify no unknown invflags */ return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0; } static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct udphdr *uh; struct udphdr _udph; const struct xt_udp *udpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph); if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil UDP tinygram.\n"); par->hotdrop = true; return false; } return port_match(udpinfo->spts[0], udpinfo->spts[1], ntohs(uh->source), !!(udpinfo->invflags & XT_UDP_INV_SRCPT)) && port_match(udpinfo->dpts[0], udpinfo->dpts[1], ntohs(uh->dest), !!(udpinfo->invflags & XT_UDP_INV_DSTPT)); } static int udp_mt_check(const struct xt_mtchk_param *par) { const struct xt_udp *udpinfo = par->matchinfo; /* Must specify no unknown invflags */ return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0; } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ static bool type_code_in_range(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code) { return type == test_type && code >= min_code && code <= max_code; } static bool icmp_type_code_match(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code, bool invert) { return (test_type == 0xFF || type_code_in_range(test_type, min_code, max_code, type, code)) ^ invert; } static bool icmp6_type_code_match(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code, bool invert) { return type_code_in_range(test_type, min_code, max_code, type, code) ^ invert; } static bool icmp_match(const struct sk_buff *skb, struct xt_action_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (!ic) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ par->hotdrop = true; return false; } return icmp_type_code_match(icmpinfo->type, icmpinfo->code[0], icmpinfo->code[1], ic->type, ic->code, !!(icmpinfo->invflags & IPT_ICMP_INV)); } static bool icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) { const struct icmp6hdr *ic; struct icmp6hdr _icmph; const struct ip6t_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (!ic) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ par->hotdrop = true; return false; } return icmp6_type_code_match(icmpinfo->type, icmpinfo->code[0], icmpinfo->code[1], ic->icmp6_type, ic->icmp6_code, !!(icmpinfo->invflags & IP6T_ICMP_INV)); } static int icmp_checkentry(const struct xt_mtchk_param *par) { const struct ipt_icmp *icmpinfo = par->matchinfo; return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; } static int icmp6_checkentry(const struct xt_mtchk_param *par) { const struct ip6t_icmp *icmpinfo = par->matchinfo; return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; } static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", .family = NFPROTO_IPV4, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, { .name = "tcp", .family = NFPROTO_IPV6, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, { .name = "udp", .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDP, .me = THIS_MODULE, }, { .name = "udp", .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDP, .me = THIS_MODULE, }, { .name = "udplite", .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, { .name = "udplite", .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, { .name = "icmp", .match = icmp_match, .matchsize = sizeof(struct ipt_icmp), .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, .family = NFPROTO_IPV4, .me = THIS_MODULE, }, { .name = "icmp6", .match = icmp6_match, .matchsize = sizeof(struct ip6t_icmp), .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, .family = NFPROTO_IPV6, .me = THIS_MODULE, }, }; static int __init tcpudp_mt_init(void) { return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg)); } static void __exit tcpudp_mt_exit(void) { xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg)); } module_init(tcpudp_mt_init); module_exit(tcpudp_mt_exit);
4 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2006, Johannes Berg <johannes@sipsolutions.net> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/leds.h> #include "ieee80211_i.h" #define MAC80211_BLINK_DELAY 50 /* ms */ static inline void ieee80211_led_rx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS if (!atomic_read(&local->rx_led_active)) return; led_trigger_blink_oneshot(&local->rx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0); #endif } static inline void ieee80211_led_tx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS if (!atomic_read(&local->tx_led_active)) return; led_trigger_blink_oneshot(&local->tx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0); #endif } #ifdef CONFIG_MAC80211_LEDS void ieee80211_led_assoc(struct ieee80211_local *local, bool associated); void ieee80211_led_radio(struct ieee80211_local *local, bool enabled); void ieee80211_alloc_led_names(struct ieee80211_local *local); void ieee80211_free_led_names(struct ieee80211_local *local); void ieee80211_led_init(struct ieee80211_local *local); void ieee80211_led_exit(struct ieee80211_local *local); void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off); #else static inline void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { } static inline void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { } static inline void ieee80211_alloc_led_names(struct ieee80211_local *local) { } static inline void ieee80211_free_led_names(struct ieee80211_local *local) { } static inline void ieee80211_led_init(struct ieee80211_local *local) { } static inline void ieee80211_led_exit(struct ieee80211_local *local) { } static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off) { } #endif static inline void ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->tx_bytes += bytes; #endif } static inline void ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->rx_bytes += bytes; #endif }
40 3 3 3 3 3 3 3 50 50 8 50 50 50 12 50 50 50 50 50 50 50 50 51 50 52 52 52 50 50 50 50 50 12 50 50 50 52 53 52 52 12 51 52 50 50 50 53 53 53 53 53 53 53 53 53 53 50 50 50 50 50 53 53 53 53 53 50 50 50 50 50 51 51 50 51 51 53 53 53 53 53 53 53 52 50 50 50 49 50 53 8 8 8 8 8 8 8 54 53 54 54 54 51 54 53 53 53 53 54 54 50 6 6 6 6 2 2 2 6 2 6 45 45 45 41 45 41 11 45 44 8 8 40 41 41 41 40 41 41 41 41 36 41 45 54 54 54 54 54 11 54 54 51 51 51 51 50 51 51 48 8 8 10 10 10 10 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 // SPDX-License-Identifier: GPL-2.0 /* * Copyright 2012 Google, Inc. * * Foreground allocator code: allocate buckets from freelist, and allocate in * sector granularity from writepoints. * * bch2_bucket_alloc() allocates a single bucket from a specific device. * * bch2_bucket_alloc_set() allocates one or more buckets from different devices * in a given filesystem. */ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" #include "btree_iter.h" #include "btree_update.h" #include "btree_gc.h" #include "buckets.h" #include "buckets_waiting_for_journal.h" #include "clock.h" #include "debug.h" #include "disk_groups.h" #include "ec.h" #include "error.h" #include "io_write.h" #include "journal.h" #include "movinggc.h" #include "nocow_locking.h" #include "trace.h" #include <linux/math64.h> #include <linux/rculist.h> #include <linux/rcupdate.h> static void bch2_trans_mutex_lock_norelock(struct btree_trans *trans, struct mutex *lock) { if (!mutex_trylock(lock)) { bch2_trans_unlock(trans); mutex_lock(lock); } } const char * const bch2_watermarks[] = { #define x(t) #t, BCH_WATERMARKS() #undef x NULL }; /* * Open buckets represent a bucket that's currently being allocated from. They * serve two purposes: * * - They track buckets that have been partially allocated, allowing for * sub-bucket sized allocations - they're used by the sector allocator below * * - They provide a reference to the buckets they own that mark and sweep GC * can find, until the new allocation has a pointer to it inserted into the * btree * * When allocating some space with the sector allocator, the allocation comes * with a reference to an open bucket - the caller is required to put that * reference _after_ doing the index update that makes its allocation reachable. */ void bch2_reset_alloc_cursors(struct bch_fs *c) { rcu_read_lock(); for_each_member_device_rcu(c, ca, NULL) ca->alloc_cursor = 0; rcu_read_unlock(); } static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob) { open_bucket_idx_t idx = ob - c->open_buckets; open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket); ob->hash = *slot; *slot = idx; } static void bch2_open_bucket_hash_remove(struct bch_fs *c, struct open_bucket *ob) { open_bucket_idx_t idx = ob - c->open_buckets; open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket); while (*slot != idx) { BUG_ON(!*slot); slot = &c->open_buckets[*slot].hash; } *slot = ob->hash; ob->hash = 0; } void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); if (ob->ec) { ec_stripe_new_put(c, ob->ec, STRIPE_REF_io); return; } percpu_down_read(&c->mark_lock); spin_lock(&ob->lock); ob->valid = false; ob->data_type = 0; spin_unlock(&ob->lock); percpu_up_read(&c->mark_lock); spin_lock(&c->freelist_lock); bch2_open_bucket_hash_remove(c, ob); ob->freelist = c->open_buckets_freelist; c->open_buckets_freelist = ob - c->open_buckets; c->open_buckets_nr_free++; ca->nr_open_buckets--; spin_unlock(&c->freelist_lock); closure_wake_up(&c->open_buckets_wait); } void bch2_open_bucket_write_error(struct bch_fs *c, struct open_buckets *obs, unsigned dev) { struct open_bucket *ob; unsigned i; open_bucket_for_each(c, obs, ob, i) if (ob->dev == dev && ob->ec) bch2_ec_bucket_cancel(c, ob); } static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) { struct open_bucket *ob; BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free); ob = c->open_buckets + c->open_buckets_freelist; c->open_buckets_freelist = ob->freelist; atomic_set(&ob->pin, 1); ob->data_type = 0; c->open_buckets_nr_free--; return ob; } static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) { BUG_ON(c->open_buckets_partial_nr >= ARRAY_SIZE(c->open_buckets_partial)); spin_lock(&c->freelist_lock); ob->on_partial_list = true; c->open_buckets_partial[c->open_buckets_partial_nr++] = ob - c->open_buckets; spin_unlock(&c->freelist_lock); closure_wake_up(&c->open_buckets_wait); closure_wake_up(&c->freelist_wait); } /* _only_ for allocating the journal on a new device: */ long bch2_bucket_alloc_new_fs(struct bch_dev *ca) { while (ca->new_fs_bucket_idx < ca->mi.nbuckets) { u64 b = ca->new_fs_bucket_idx++; if (!is_superblock_bucket(ca, b) && (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse))) return b; } return -1; } static inline unsigned open_buckets_reserved(enum bch_watermark watermark) { switch (watermark) { case BCH_WATERMARK_interior_updates: return 0; case BCH_WATERMARK_reclaim: return OPEN_BUCKETS_COUNT / 6; case BCH_WATERMARK_btree: case BCH_WATERMARK_btree_copygc: return OPEN_BUCKETS_COUNT / 4; case BCH_WATERMARK_copygc: return OPEN_BUCKETS_COUNT / 3; default: return OPEN_BUCKETS_COUNT / 2; } } static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, u64 bucket, enum bch_watermark watermark, const struct bch_alloc_v4 *a, struct bucket_alloc_state *s, struct closure *cl) { struct open_bucket *ob; if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) { s->skipped_nouse++; return NULL; } if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { s->skipped_open++; return NULL; } if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) { s->skipped_need_journal_commit++; return NULL; } if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) { s->skipped_nocow++; return NULL; } spin_lock(&c->freelist_lock); if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) { if (cl) closure_wait(&c->open_buckets_wait, cl); track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); spin_unlock(&c->freelist_lock); return ERR_PTR(-BCH_ERR_open_buckets_empty); } /* Recheck under lock: */ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { spin_unlock(&c->freelist_lock); s->skipped_open++; return NULL; } ob = bch2_open_bucket_alloc(c); spin_lock(&ob->lock); ob->valid = true; ob->sectors_free = ca->mi.bucket_size; ob->dev = ca->dev_idx; ob->gen = a->gen; ob->bucket = bucket; spin_unlock(&ob->lock); ca->nr_open_buckets++; bch2_open_bucket_hash_add(c, ob); track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false); track_event_change(&c->times[BCH_TIME_blocked_allocate], false); spin_unlock(&c->freelist_lock); return ob; } static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca, enum bch_watermark watermark, u64 free_entry, struct bucket_alloc_state *s, struct bkey_s_c freespace_k, struct closure *cl) { struct bch_fs *c = trans->c; struct btree_iter iter = { NULL }; struct bkey_s_c k; struct open_bucket *ob; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; u64 b = free_entry & ~(~0ULL << 56); unsigned genbits = free_entry >> 56; struct printbuf buf = PRINTBUF; int ret; if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) { prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n" " freespace key ", ca->mi.first_bucket, ca->mi.nbuckets); bch2_bkey_val_to_text(&buf, c, freespace_k); bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; } k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, b), BTREE_ITER_CACHED); ret = bkey_err(k); if (ret) { ob = ERR_PTR(ret); goto err; } a = bch2_alloc_to_v4(k, &a_convert); if (a->data_type != BCH_DATA_free) { if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { ob = NULL; goto err; } prt_printf(&buf, "non free bucket in freespace btree\n" " freespace key "); bch2_bkey_val_to_text(&buf, c, freespace_k); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; } if (genbits != (alloc_freespace_genbits(*a) >> 56) && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" " freespace key ", genbits, alloc_freespace_genbits(*a) >> 56); bch2_bkey_val_to_text(&buf, c, freespace_k); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; } if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) { struct bch_backpointer bp; struct bpos bp_pos = POS_MIN; ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1, &bp_pos, &bp, BTREE_ITER_NOPRESERVE); if (ret) { ob = ERR_PTR(ret); goto err; } if (!bkey_eq(bp_pos, POS_MAX)) { /* * Bucket may have data in it - we don't call * bc2h_trans_inconnsistent() because fsck hasn't * finished yet */ ob = NULL; goto err; } } ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl); if (!ob) set_btree_iter_dontneed(&iter); err: if (iter.path) set_btree_iter_dontneed(&iter); bch2_trans_iter_exit(trans, &iter); printbuf_exit(&buf); return ob; } /* * This path is for before the freespace btree is initialized: * * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock & * journal buckets - journal buckets will be < ca->new_fs_bucket_idx */ static noinline struct open_bucket * bch2_bucket_alloc_early(struct btree_trans *trans, struct bch_dev *ca, enum bch_watermark watermark, struct bucket_alloc_state *s, struct closure *cl) { struct btree_iter iter, citer; struct bkey_s_c k, ck; struct open_bucket *ob = NULL; u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor)); u64 alloc_cursor = alloc_start; int ret; /* * Scan with an uncached iterator to avoid polluting the key cache. An * uncached iter will return a cached key if one exists, but if not * there is no other underlying protection for the associated key cache * slot. To avoid racing bucket allocations, look up the cached key slot * of any likely allocation candidate before attempting to proceed with * the allocation. This provides proper exclusion on the associated * bucket. */ again: for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor), BTREE_ITER_SLOTS, k, ret) { struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets))) break; if (ca->new_fs_bucket_idx && is_superblock_bucket(ca, k.k->p.offset)) continue; a = bch2_alloc_to_v4(k, &a_convert); if (a->data_type != BCH_DATA_free) continue; /* now check the cached key to serialize concurrent allocs of the bucket */ ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED); ret = bkey_err(ck); if (ret) break; a = bch2_alloc_to_v4(ck, &a_convert); if (a->data_type != BCH_DATA_free) goto next; s->buckets_seen++; ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); next: set_btree_iter_dontneed(&citer); bch2_trans_iter_exit(trans, &citer); if (ob) break; } bch2_trans_iter_exit(trans, &iter); alloc_cursor = iter.pos.offset; ca->alloc_cursor = alloc_cursor; if (!ob && ret) ob = ERR_PTR(ret); if (!ob && alloc_start > first_bucket) { alloc_cursor = alloc_start = first_bucket; goto again; } return ob; } static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, struct bch_dev *ca, enum bch_watermark watermark, struct bucket_alloc_state *s, struct closure *cl) { struct btree_iter iter; struct bkey_s_c k; struct open_bucket *ob = NULL; u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor)); u64 alloc_cursor = alloc_start; int ret; BUG_ON(ca->new_fs_bucket_idx); again: for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace, POS(ca->dev_idx, alloc_cursor), 0, k, ret) { if (k.k->p.inode != ca->dev_idx) break; for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); alloc_cursor < k.k->p.offset; alloc_cursor++) { ret = btree_trans_too_many_iters(trans); if (ret) { ob = ERR_PTR(ret); break; } s->buckets_seen++; ob = try_alloc_bucket(trans, ca, watermark, alloc_cursor, s, k, cl); if (ob) { set_btree_iter_dontneed(&iter); break; } } if (ob || ret) break; } bch2_trans_iter_exit(trans, &iter); ca->alloc_cursor = alloc_cursor; if (!ob && ret) ob = ERR_PTR(ret); if (!ob && alloc_start > ca->mi.first_bucket) { alloc_cursor = alloc_start = ca->mi.first_bucket; goto again; } return ob; } /** * bch2_bucket_alloc_trans - allocate a single bucket from a specific device * @trans: transaction object * @ca: device to allocate from * @watermark: how important is this allocation? * @cl: if not NULL, closure to be used to wait if buckets not available * @usage: for secondarily also returning the current device usage * * Returns: an open_bucket on success, or an ERR_PTR() on failure. */ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct bch_dev *ca, enum bch_watermark watermark, struct closure *cl, struct bch_dev_usage *usage) { struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; bool freespace = READ_ONCE(ca->mi.freespace_initialized); u64 avail; struct bucket_alloc_state s = { 0 }; bool waiting = false; again: bch2_dev_usage_read_fast(ca, usage); avail = dev_buckets_free(ca, *usage, watermark); if (usage->d[BCH_DATA_need_discard].buckets > avail) bch2_do_discards(c); if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) bch2_do_gc_gens(c); if (should_invalidate_buckets(ca, *usage)) bch2_do_invalidates(c); if (!avail) { if (cl && !waiting) { closure_wait(&c->freelist_wait, cl); waiting = true; goto again; } track_event_change(&c->times[BCH_TIME_blocked_allocate], true); ob = ERR_PTR(-BCH_ERR_freelist_empty); goto err; } if (waiting) closure_wake_up(&c->freelist_wait); alloc: ob = likely(freespace) ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl) : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl); if (s.skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); if (!ob && freespace && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { freespace = false; goto alloc; } err: if (!ob) ob = ERR_PTR(-BCH_ERR_no_buckets_found); if (!IS_ERR(ob)) trace_and_count(c, bucket_alloc, ca, bch2_watermarks[watermark], ob->bucket, usage->d[BCH_DATA_free].buckets, avail, bch2_copygc_wait_amount(c), c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), &s, cl == NULL, ""); else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart)) trace_and_count(c, bucket_alloc_fail, ca, bch2_watermarks[watermark], 0, usage->d[BCH_DATA_free].buckets, avail, bch2_copygc_wait_amount(c), c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), &s, cl == NULL, bch2_err_str(PTR_ERR(ob))); return ob; } struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, enum bch_watermark watermark, struct closure *cl) { struct bch_dev_usage usage; struct open_bucket *ob; bch2_trans_do(c, NULL, NULL, 0, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark, cl, &usage))); return ob; } static int __dev_stripe_cmp(struct dev_stripe_state *stripe, unsigned l, unsigned r) { return ((stripe->next_alloc[l] > stripe->next_alloc[r]) - (stripe->next_alloc[l] < stripe->next_alloc[r])); } #define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r) struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, struct dev_stripe_state *stripe, struct bch_devs_mask *devs) { struct dev_alloc_list ret = { .nr = 0 }; unsigned i; for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX) ret.devs[ret.nr++] = i; bubble_sort(ret.devs, ret.nr, dev_stripe_cmp); return ret; } static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca, struct dev_stripe_state *stripe, struct bch_dev_usage *usage) { u64 *v = stripe->next_alloc + ca->dev_idx; u64 free_space = dev_buckets_available(ca, BCH_WATERMARK_normal); u64 free_space_inv = free_space ? div64_u64(1ULL << 48, free_space) : 1ULL << 48; u64 scale = *v / 4; if (*v + free_space_inv >= *v) *v += free_space_inv; else *v = U64_MAX; for (v = stripe->next_alloc; v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) *v = *v < scale ? 0 : *v - scale; } void bch2_dev_stripe_increment(struct bch_dev *ca, struct dev_stripe_state *stripe) { struct bch_dev_usage usage; bch2_dev_usage_read_fast(ca, &usage); bch2_dev_stripe_increment_inlined(ca, stripe, &usage); } static int add_new_bucket(struct bch_fs *c, struct open_buckets *ptrs, struct bch_devs_mask *devs_may_alloc, unsigned nr_replicas, unsigned *nr_effective, bool *have_cache, unsigned flags, struct open_bucket *ob) { unsigned durability = bch_dev_bkey_exists(c, ob->dev)->mi.durability; BUG_ON(*nr_effective >= nr_replicas); __clear_bit(ob->dev, devs_may_alloc->d); *nr_effective += durability; *have_cache |= !durability; ob_push(c, ptrs, ob); if (*nr_effective >= nr_replicas) return 1; if (ob->ec) return 1; return 0; } int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct open_buckets *ptrs, struct dev_stripe_state *stripe, struct bch_devs_mask *devs_may_alloc, unsigned nr_replicas, unsigned *nr_effective, bool *have_cache, unsigned flags, enum bch_data_type data_type, enum bch_watermark watermark, struct closure *cl) { struct bch_fs *c = trans->c; struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); unsigned dev; struct bch_dev *ca; int ret = -BCH_ERR_insufficient_devices; unsigned i; BUG_ON(*nr_effective >= nr_replicas); for (i = 0; i < devs_sorted.nr; i++) { struct bch_dev_usage usage; struct open_bucket *ob; dev = devs_sorted.devs[i]; rcu_read_lock(); ca = rcu_dereference(c->devs[dev]); if (ca) percpu_ref_get(&ca->ref); rcu_read_unlock(); if (!ca) continue; if (!ca->mi.durability && *have_cache) { percpu_ref_put(&ca->ref); continue; } ob = bch2_bucket_alloc_trans(trans, ca, watermark, cl, &usage); if (!IS_ERR(ob)) bch2_dev_stripe_increment_inlined(ca, stripe, &usage); percpu_ref_put(&ca->ref); if (IS_ERR(ob)) { ret = PTR_ERR(ob); if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) break; continue; } ob->data_type = data_type; if (add_new_bucket(c, ptrs, devs_may_alloc, nr_replicas, nr_effective, have_cache, flags, ob)) { ret = 0; break; } } return ret; } /* Allocate from stripes: */ /* * if we can't allocate a new stripe because there are already too many * partially filled stripes, force allocating from an existing stripe even when * it's to a device we don't want: */ static int bucket_alloc_from_stripe(struct btree_trans *trans, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_mask *devs_may_alloc, u16 target, unsigned nr_replicas, unsigned *nr_effective, bool *have_cache, enum bch_watermark watermark, unsigned flags, struct closure *cl) { struct bch_fs *c = trans->c; struct dev_alloc_list devs_sorted; struct ec_stripe_head *h; struct open_bucket *ob; unsigned i, ec_idx; int ret = 0; if (nr_replicas < 2) return 0; if (ec_open_bucket(c, ptrs)) return 0; h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); if (IS_ERR(h)) return PTR_ERR(h); if (!h) return 0; devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); for (i = 0; i < devs_sorted.nr; i++) for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { if (!h->s->blocks[ec_idx]) continue; ob = c->open_buckets + h->s->blocks[ec_idx]; if (ob->dev == devs_sorted.devs[i] && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) goto got_bucket; } goto out_put_head; got_bucket: ob->ec_idx = ec_idx; ob->ec = h->s; ec_stripe_new_get(h->s, STRIPE_REF_io); ret = add_new_bucket(c, ptrs, devs_may_alloc, nr_replicas, nr_effective, have_cache, flags, ob); out_put_head: bch2_ec_stripe_head_put(c, h); return ret; } /* Sector allocator */ static bool want_bucket(struct bch_fs *c, struct write_point *wp, struct bch_devs_mask *devs_may_alloc, bool *have_cache, bool ec, struct open_bucket *ob) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); if (!test_bit(ob->dev, devs_may_alloc->d)) return false; if (ob->data_type != wp->data_type) return false; if (!ca->mi.durability && (wp->data_type == BCH_DATA_btree || ec || *have_cache)) return false; if (ec != (ob->ec != NULL)) return false; return true; } static int bucket_alloc_set_writepoint(struct bch_fs *c, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_mask *devs_may_alloc, unsigned nr_replicas, unsigned *nr_effective, bool *have_cache, bool ec, unsigned flags) { struct open_buckets ptrs_skip = { .nr = 0 }; struct open_bucket *ob; unsigned i; int ret = 0; open_bucket_for_each(c, &wp->ptrs, ob, i) { if (!ret && want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) ret = add_new_bucket(c, ptrs, devs_may_alloc, nr_replicas, nr_effective, have_cache, flags, ob); else ob_push(c, &ptrs_skip, ob); } wp->ptrs = ptrs_skip; return ret; } static int bucket_alloc_set_partial(struct bch_fs *c, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_mask *devs_may_alloc, unsigned nr_replicas, unsigned *nr_effective, bool *have_cache, bool ec, enum bch_watermark watermark, unsigned flags) { int i, ret = 0; if (!c->open_buckets_partial_nr) return 0; spin_lock(&c->freelist_lock); if (!c->open_buckets_partial_nr) goto unlock; for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) { struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i]; if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); struct bch_dev_usage usage; u64 avail; bch2_dev_usage_read_fast(ca, &usage); avail = dev_buckets_free(ca, usage, watermark); if (!avail) continue; array_remove_item(c->open_buckets_partial, c->open_buckets_partial_nr, i); ob->on_partial_list = false; ret = add_new_bucket(c, ptrs, devs_may_alloc, nr_replicas, nr_effective, have_cache, flags, ob); if (ret) break; } } unlock: spin_unlock(&c->freelist_lock); return ret; } static int __open_bucket_add_buckets(struct btree_trans *trans, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_list *devs_have, u16 target, bool erasure_code, unsigned nr_replicas, unsigned *nr_effective, bool *have_cache, enum bch_watermark watermark, unsigned flags, struct closure *_cl) { struct bch_fs *c = trans->c; struct bch_devs_mask devs; struct open_bucket *ob; struct closure *cl = NULL; unsigned i; int ret; devs = target_rw_devs(c, wp->data_type, target); /* Don't allocate from devices we already have pointers to: */ darray_for_each(*devs_have, i) __clear_bit(*i, devs.d); open_bucket_for_each(c, ptrs, ob, i) __clear_bit(ob->dev, devs.d); if (erasure_code && ec_open_bucket(c, ptrs)) return 0; ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs, nr_replicas, nr_effective, have_cache, erasure_code, flags); if (ret) return ret; ret = bucket_alloc_set_partial(c, ptrs, wp, &devs, nr_replicas, nr_effective, have_cache, erasure_code, watermark, flags); if (ret) return ret; if (erasure_code) { ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs, target, nr_replicas, nr_effective, have_cache, watermark, flags, _cl); } else { retry_blocking: /* * Try nonblocking first, so that if one device is full we'll try from * other devices: */ ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs, nr_replicas, nr_effective, have_cache, flags, wp->data_type, watermark, cl); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && !cl && _cl) { cl = _cl; goto retry_blocking; } } return ret; } static int open_bucket_add_buckets(struct btree_trans *trans, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_list *devs_have, u16 target, unsigned erasure_code, unsigned nr_replicas, unsigned *nr_effective, bool *have_cache, enum bch_watermark watermark, unsigned flags, struct closure *cl) { int ret; if (erasure_code) { ret = __open_bucket_add_buckets(trans, ptrs, wp, devs_have, target, erasure_code, nr_replicas, nr_effective, have_cache, watermark, flags, cl); if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || bch2_err_matches(ret, BCH_ERR_operation_blocked) || bch2_err_matches(ret, BCH_ERR_freelist_empty) || bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) return ret; if (*nr_effective >= nr_replicas) return 0; } ret = __open_bucket_add_buckets(trans, ptrs, wp, devs_have, target, false, nr_replicas, nr_effective, have_cache, watermark, flags, cl); return ret < 0 ? ret : 0; } /** * should_drop_bucket - check if this is open_bucket should go away * @ob: open_bucket to predicate on * @c: filesystem handle * @ca: if set, we're killing buckets for a particular device * @ec: if true, we're shutting down erasure coding and killing all ec * open_buckets * otherwise, return true * Returns: true if we should kill this open_bucket * * We're killing open_buckets because we're shutting down a device, erasure * coding, or the entire filesystem - check if this open_bucket matches: */ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, struct bch_dev *ca, bool ec) { if (ec) { return ob->ec != NULL; } else if (ca) { bool drop = ob->dev == ca->dev_idx; struct open_bucket *ob2; unsigned i; if (!drop && ob->ec) { unsigned nr_blocks; mutex_lock(&ob->ec->lock); nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; for (i = 0; i < nr_blocks; i++) { if (!ob->ec->blocks[i]) continue; ob2 = c->open_buckets + ob->ec->blocks[i]; drop |= ob2->dev == ca->dev_idx; } mutex_unlock(&ob->ec->lock); } return drop; } else { return true; } } static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, bool ec, struct write_point *wp) { struct open_buckets ptrs = { .nr = 0 }; struct open_bucket *ob; unsigned i; mutex_lock(&wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) if (should_drop_bucket(ob, c, ca, ec)) bch2_open_bucket_put(c, ob); else ob_push(c, &ptrs, ob); wp->ptrs = ptrs; mutex_unlock(&wp->lock); } void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, bool ec) { unsigned i; /* Next, close write points that point to this device... */ for (i = 0; i < ARRAY_SIZE(c->write_points); i++) bch2_writepoint_stop(c, ca, ec, &c->write_points[i]); bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point); bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); mutex_lock(&c->btree_reserve_cache_lock); while (c->btree_reserve_cache_nr) { struct btree_alloc *a = &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; bch2_open_buckets_put(c, &a->ob); } mutex_unlock(&c->btree_reserve_cache_lock); spin_lock(&c->freelist_lock); i = 0; while (i < c->open_buckets_partial_nr) { struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i]; if (should_drop_bucket(ob, c, ca, ec)) { --c->open_buckets_partial_nr; swap(c->open_buckets_partial[i], c->open_buckets_partial[c->open_buckets_partial_nr]); ob->on_partial_list = false; spin_unlock(&c->freelist_lock); bch2_open_bucket_put(c, ob); spin_lock(&c->freelist_lock); } else { i++; } } spin_unlock(&c->freelist_lock); bch2_ec_stop_dev(c, ca); } static inline struct hlist_head *writepoint_hash(struct bch_fs *c, unsigned long write_point) { unsigned hash = hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash))); return &c->write_points_hash[hash]; } static struct write_point *__writepoint_find(struct hlist_head *head, unsigned long write_point) { struct write_point *wp; rcu_read_lock(); hlist_for_each_entry_rcu(wp, head, node) if (wp->write_point == write_point) goto out; wp = NULL; out: rcu_read_unlock(); return wp; } static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor) { u64 stranded = c->write_points_nr * c->bucket_size_max; u64 free = bch2_fs_usage_read_short(c).free; return stranded * factor > free; } static bool try_increase_writepoints(struct bch_fs *c) { struct write_point *wp; if (c->write_points_nr == ARRAY_SIZE(c->write_points) || too_many_writepoints(c, 32)) return false; wp = c->write_points + c->write_points_nr++; hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point)); return true; } static bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr) { struct bch_fs *c = trans->c; struct write_point *wp; struct open_bucket *ob; unsigned i; mutex_lock(&c->write_points_hash_lock); if (c->write_points_nr < old_nr) { mutex_unlock(&c->write_points_hash_lock); return true; } if (c->write_points_nr == 1 || !too_many_writepoints(c, 8)) { mutex_unlock(&c->write_points_hash_lock); return false; } wp = c->write_points + --c->write_points_nr; hlist_del_rcu(&wp->node); mutex_unlock(&c->write_points_hash_lock); bch2_trans_mutex_lock_norelock(trans, &wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) open_bucket_free_unused(c, ob); wp->ptrs.nr = 0; mutex_unlock(&wp->lock); return true; } static struct write_point *writepoint_find(struct btree_trans *trans, unsigned long write_point) { struct bch_fs *c = trans->c; struct write_point *wp, *oldest; struct hlist_head *head; if (!(write_point & 1UL)) { wp = (struct write_point *) write_point; bch2_trans_mutex_lock_norelock(trans, &wp->lock); return wp; } head = writepoint_hash(c, write_point); restart_find: wp = __writepoint_find(head, write_point); if (wp) { lock_wp: bch2_trans_mutex_lock_norelock(trans, &wp->lock); if (wp->write_point == write_point) goto out; mutex_unlock(&wp->lock); goto restart_find; } restart_find_oldest: oldest = NULL; for (wp = c->write_points; wp < c->write_points + c->write_points_nr; wp++) if (!oldest || time_before64(wp->last_used, oldest->last_used)) oldest = wp; bch2_trans_mutex_lock_norelock(trans, &oldest->lock); bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock); if (oldest >= c->write_points + c->write_points_nr || try_increase_writepoints(c)) { mutex_unlock(&c->write_points_hash_lock); mutex_unlock(&oldest->lock); goto restart_find_oldest; } wp = __writepoint_find(head, write_point); if (wp && wp != oldest) { mutex_unlock(&c->write_points_hash_lock); mutex_unlock(&oldest->lock); goto lock_wp; } wp = oldest; hlist_del_rcu(&wp->node); wp->write_point = write_point; hlist_add_head_rcu(&wp->node, head); mutex_unlock(&c->write_points_hash_lock); out: wp->last_used = local_clock(); return wp; } static noinline void deallocate_extra_replicas(struct bch_fs *c, struct open_buckets *ptrs, struct open_buckets *ptrs_no_use, unsigned extra_replicas) { struct open_buckets ptrs2 = { 0 }; struct open_bucket *ob; unsigned i; open_bucket_for_each(c, ptrs, ob, i) { unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability; if (d && d <= extra_replicas) { extra_replicas -= d; ob_push(c, ptrs_no_use, ob); } else { ob_push(c, &ptrs2, ob); } } *ptrs = ptrs2; } /* * Get us an open_bucket we can allocate from, return with it locked: */ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, unsigned target, unsigned erasure_code, struct write_point_specifier write_point, struct bch_devs_list *devs_have, unsigned nr_replicas, unsigned nr_replicas_required, enum bch_watermark watermark, unsigned flags, struct closure *cl, struct write_point **wp_ret) { struct bch_fs *c = trans->c; struct write_point *wp; struct open_bucket *ob; struct open_buckets ptrs; unsigned nr_effective, write_points_nr; bool have_cache; int ret; int i; if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) erasure_code = false; BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); BUG_ON(!nr_replicas || !nr_replicas_required); retry: ptrs.nr = 0; nr_effective = 0; write_points_nr = c->write_points_nr; have_cache = false; *wp_ret = wp = writepoint_find(trans, write_point.v); /* metadata may not allocate on cache devices: */ if (wp->data_type != BCH_DATA_user) have_cache = true; if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, watermark, flags, NULL); if (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto alloc_done; /* Don't retry from all devices if we're out of open buckets: */ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, watermark, flags, cl); if (!ret2 || bch2_err_matches(ret2, BCH_ERR_transaction_restart) || bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) { ret = ret2; goto alloc_done; } } /* * Only try to allocate cache (durability = 0 devices) from the * specified target: */ have_cache = true; ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, 0, erasure_code, nr_replicas, &nr_effective, &have_cache, watermark, flags, cl); } else { ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, watermark, flags, cl); } alloc_done: BUG_ON(!ret && nr_effective < nr_replicas); if (erasure_code && !ec_open_bucket(c, &ptrs)) pr_debug("failed to get ec bucket: ret %u", ret); if (ret == -BCH_ERR_insufficient_devices && nr_effective >= nr_replicas_required) ret = 0; if (ret) goto err; if (nr_effective > nr_replicas) deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas); /* Free buckets we didn't use: */ open_bucket_for_each(c, &wp->ptrs, ob, i) open_bucket_free_unused(c, ob); wp->ptrs = ptrs; wp->sectors_free = UINT_MAX; open_bucket_for_each(c, &wp->ptrs, ob, i) wp->sectors_free = min(wp->sectors_free, ob->sectors_free); BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); return 0; err: open_bucket_for_each(c, &wp->ptrs, ob, i) if (ptrs.nr < ARRAY_SIZE(ptrs.v)) ob_push(c, &ptrs, ob); else open_bucket_free_unused(c, ob); wp->ptrs = ptrs; mutex_unlock(&wp->lock); if (bch2_err_matches(ret, BCH_ERR_freelist_empty) && try_decrease_writepoints(trans, write_points_nr)) goto retry; if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) || bch2_err_matches(ret, BCH_ERR_freelist_empty)) return cl ? -BCH_ERR_bucket_alloc_blocked : -BCH_ERR_ENOSPC_bucket_alloc; return ret; } struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); return (struct bch_extent_ptr) { .type = 1 << BCH_EXTENT_ENTRY_ptr, .gen = ob->gen, .dev = ob->dev, .offset = bucket_to_sector(ca, ob->bucket) + ca->mi.bucket_size - ob->sectors_free, }; } void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, struct bkey_i *k, unsigned sectors, bool cached) { bch2_alloc_sectors_append_ptrs_inlined(c, wp, k, sectors, cached); } /* * Append pointers to the space we just allocated to @k, and mark @sectors space * as allocated out of @ob */ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp) { bch2_alloc_sectors_done_inlined(c, wp); } static inline void writepoint_init(struct write_point *wp, enum bch_data_type type) { mutex_init(&wp->lock); wp->data_type = type; INIT_WORK(&wp->index_update_work, bch2_write_point_do_index_updates); INIT_LIST_HEAD(&wp->writes); spin_lock_init(&wp->writes_lock); } void bch2_fs_allocator_foreground_init(struct bch_fs *c) { struct open_bucket *ob; struct write_point *wp; mutex_init(&c->write_points_hash_lock); c->write_points_nr = ARRAY_SIZE(c->write_points); /* open bucket 0 is a sentinal NULL: */ spin_lock_init(&c->open_buckets[0].lock); for (ob = c->open_buckets + 1; ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock_init(&ob->lock); c->open_buckets_nr_free++; ob->freelist = c->open_buckets_freelist; c->open_buckets_freelist = ob - c->open_buckets; } writepoint_init(&c->btree_write_point, BCH_DATA_btree); writepoint_init(&c->rebalance_write_point, BCH_DATA_user); writepoint_init(&c->copygc_write_point, BCH_DATA_user); for (wp = c->write_points; wp < c->write_points + c->write_points_nr; wp++) { writepoint_init(wp, BCH_DATA_user); wp->last_used = local_clock(); wp->write_point = (unsigned long) wp; hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point)); } } static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); unsigned data_type = ob->data_type; barrier(); /* READ_ONCE() doesn't work on bitfields */ prt_printf(out, "%zu ref %u ", ob - c->open_buckets, atomic_read(&ob->pin)); bch2_prt_data_type(out, data_type); prt_printf(out, " %u:%llu gen %u allocated %u/%u", ob->dev, ob->bucket, ob->gen, ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size); if (ob->ec) prt_printf(out, " ec idx %llu", ob->ec->idx); if (ob->on_partial_list) prt_str(out, " partial"); prt_newline(out); } void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) { struct open_bucket *ob; out->atomic++; for (ob = c->open_buckets; ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock(&ob->lock); if (ob->valid && !ob->on_partial_list) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } --out->atomic; } void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c) { unsigned i; out->atomic++; spin_lock(&c->freelist_lock); for (i = 0; i < c->open_buckets_partial_nr; i++) bch2_open_bucket_to_text(out, c, c->open_buckets + c->open_buckets_partial[i]); spin_unlock(&c->freelist_lock); --out->atomic; } static const char * const bch2_write_point_states[] = { #define x(n) #n, WRITE_POINT_STATES() #undef x NULL }; static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, struct write_point *wp) { struct open_bucket *ob; unsigned i; prt_printf(out, "%lu: ", wp->write_point); prt_human_readable_u64(out, wp->sectors_allocated); prt_printf(out, " last wrote: "); bch2_pr_time_units(out, sched_clock() - wp->last_used); for (i = 0; i < WRITE_POINT_STATE_NR; i++) { prt_printf(out, " %s: ", bch2_write_point_states[i]); bch2_pr_time_units(out, wp->time[i]); } prt_newline(out); printbuf_indent_add(out, 2); open_bucket_for_each(c, &wp->ptrs, ob, i) bch2_open_bucket_to_text(out, c, ob); printbuf_indent_sub(out, 2); } void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) { struct write_point *wp; prt_str(out, "Foreground write points\n"); for (wp = c->write_points; wp < c->write_points + ARRAY_SIZE(c->write_points); wp++) bch2_write_point_to_text(out, c, wp); prt_str(out, "Copygc write point\n"); bch2_write_point_to_text(out, c, &c->copygc_write_point); prt_str(out, "Rebalance write point\n"); bch2_write_point_to_text(out, c, &c->rebalance_write_point); prt_str(out, "Btree write point\n"); bch2_write_point_to_text(out, c, &c->btree_write_point); }
62 62 62 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 HUAWEI * Author: Cai Zhiyong <caizhiyong@huawei.com> * * Read block device partition table from the command line. * Typically used for fixed block (eMMC) embedded devices. * It has no MBR, so saves storage space. Bootloader can be easily accessed * by absolute address of data on the block device. * Users can easily change the partition. * * The format for the command line is just like mtdparts. * * For further information, see "Documentation/block/cmdline-partition.rst" * */ #include <linux/blkdev.h> #include <linux/fs.h> #include <linux/slab.h> #include "check.h" /* partition flags */ #define PF_RDONLY 0x01 /* Device is read only */ #define PF_POWERUP_LOCK 0x02 /* Always locked after reset */ struct cmdline_subpart { char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */ sector_t from; sector_t size; int flags; struct cmdline_subpart *next_subpart; }; struct cmdline_parts { char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */ unsigned int nr_subparts; struct cmdline_subpart *subpart; struct cmdline_parts *next_parts; }; static int parse_subpart(struct cmdline_subpart **subpart, char *partdef) { int ret = 0; struct cmdline_subpart *new_subpart; *subpart = NULL; new_subpart = kzalloc(sizeof(struct cmdline_subpart), GFP_KERNEL); if (!new_subpart) return -ENOMEM; if (*partdef == '-') { new_subpart->size = (sector_t)(~0ULL); partdef++; } else { new_subpart->size = (sector_t)memparse(partdef, &partdef); if (new_subpart->size < (sector_t)PAGE_SIZE) { pr_warn("cmdline partition size is invalid."); ret = -EINVAL; goto fail; } } if (*partdef == '@') { partdef++; new_subpart->from = (sector_t)memparse(partdef, &partdef); } else { new_subpart->from = (sector_t)(~0ULL); } if (*partdef == '(') { int length; char *next = strchr(++partdef, ')'); if (!next) { pr_warn("cmdline partition format is invalid."); ret = -EINVAL; goto fail; } length = min_t(int, next - partdef, sizeof(new_subpart->name) - 1); strscpy(new_subpart->name, partdef, length); partdef = ++next; } else new_subpart->name[0] = '\0'; new_subpart->flags = 0; if (!strncmp(partdef, "ro", 2)) { new_subpart->flags |= PF_RDONLY; partdef += 2; } if (!strncmp(partdef, "lk", 2)) { new_subpart->flags |= PF_POWERUP_LOCK; partdef += 2; } *subpart = new_subpart; return 0; fail: kfree(new_subpart); return ret; } static void free_subpart(struct cmdline_parts *parts) { struct cmdline_subpart *subpart; while (parts->subpart) { subpart = parts->subpart; parts->subpart = subpart->next_subpart; kfree(subpart); } } static int parse_parts(struct cmdline_parts **parts, const char *bdevdef) { int ret = -EINVAL; char *next; int length; struct cmdline_subpart **next_subpart; struct cmdline_parts *newparts; char buf[BDEVNAME_SIZE + 32 + 4]; *parts = NULL; newparts = kzalloc(sizeof(struct cmdline_parts), GFP_KERNEL); if (!newparts) return -ENOMEM; next = strchr(bdevdef, ':'); if (!next) { pr_warn("cmdline partition has no block device."); goto fail; } length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1); strscpy(newparts->name, bdevdef, length); newparts->nr_subparts = 0; next_subpart = &newparts->subpart; while (next && *(++next)) { bdevdef = next; next = strchr(bdevdef, ','); length = (!next) ? (sizeof(buf) - 1) : min_t(int, next - bdevdef, sizeof(buf) - 1); strscpy(buf, bdevdef, length); ret = parse_subpart(next_subpart, buf); if (ret) goto fail; newparts->nr_subparts++; next_subpart = &(*next_subpart)->next_subpart; } if (!newparts->subpart) { pr_warn("cmdline partition has no valid partition."); ret = -EINVAL; goto fail; } *parts = newparts; return 0; fail: free_subpart(newparts); kfree(newparts); return ret; } static void cmdline_parts_free(struct cmdline_parts **parts) { struct cmdline_parts *next_parts; while (*parts) { next_parts = (*parts)->next_parts; free_subpart(*parts); kfree(*parts); *parts = next_parts; } } static int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline) { int ret; char *buf; char *pbuf; char *next; struct cmdline_parts **next_parts; *parts = NULL; next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL); if (!buf) return -ENOMEM; next_parts = parts; while (next && *pbuf) { next = strchr(pbuf, ';'); if (next) *next = '\0'; ret = parse_parts(next_parts, pbuf); if (ret) goto fail; if (next) pbuf = ++next; next_parts = &(*next_parts)->next_parts; } if (!*parts) { pr_warn("cmdline partition has no valid partition."); ret = -EINVAL; goto fail; } ret = 0; done: kfree(buf); return ret; fail: cmdline_parts_free(parts); goto done; } static struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, const char *bdev) { while (parts && strncmp(bdev, parts->name, sizeof(parts->name))) parts = parts->next_parts; return parts; } static char *cmdline; static struct cmdline_parts *bdev_parts; static int add_part(int slot, struct cmdline_subpart *subpart, struct parsed_partitions *state) { int label_min; struct partition_meta_info *info; char tmp[sizeof(info->volname) + 4]; if (slot >= state->limit) return 1; put_partition(state, slot, subpart->from >> 9, subpart->size >> 9); info = &state->parts[slot].info; label_min = min_t(int, sizeof(info->volname) - 1, sizeof(subpart->name)); strscpy(info->volname, subpart->name, label_min); snprintf(tmp, sizeof(tmp), "(%s)", info->volname); strlcat(state->pp_buf, tmp, PAGE_SIZE); state->parts[slot].has_info = true; return 0; } static int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, struct parsed_partitions *state) { sector_t from = 0; struct cmdline_subpart *subpart; int slot = 1; for (subpart = parts->subpart; subpart; subpart = subpart->next_subpart, slot++) { if (subpart->from == (sector_t)(~0ULL)) subpart->from = from; else from = subpart->from; if (from >= disk_size) break; if (subpart->size > (disk_size - from)) subpart->size = disk_size - from; from += subpart->size; if (add_part(slot, subpart, state)) break; } return slot; } static int __init cmdline_parts_setup(char *s) { cmdline = s; return 1; } __setup("blkdevparts=", cmdline_parts_setup); static bool has_overlaps(sector_t from, sector_t size, sector_t from2, sector_t size2) { sector_t end = from + size; sector_t end2 = from2 + size2; if (from >= from2 && from < end2) return true; if (end > from2 && end <= end2) return true; if (from2 >= from && from2 < end) return true; if (end2 > from && end2 <= end) return true; return false; } static inline void overlaps_warns_header(void) { pr_warn("Overlapping partitions are used in command line partitions."); pr_warn("Don't use filesystems on overlapping partitions:"); } static void cmdline_parts_verifier(int slot, struct parsed_partitions *state) { int i; bool header = true; for (; slot < state->limit && state->parts[slot].has_info; slot++) { for (i = slot+1; i < state->limit && state->parts[i].has_info; i++) { if (has_overlaps(state->parts[slot].from, state->parts[slot].size, state->parts[i].from, state->parts[i].size)) { if (header) { header = false; overlaps_warns_header(); } pr_warn("%s[%llu,%llu] overlaps with " "%s[%llu,%llu].", state->parts[slot].info.volname, (u64)state->parts[slot].from << 9, (u64)state->parts[slot].size << 9, state->parts[i].info.volname, (u64)state->parts[i].from << 9, (u64)state->parts[i].size << 9); } } } } /* * Purpose: allocate cmdline partitions. * Returns: * -1 if unable to read the partition table * 0 if this isn't our partition table * 1 if successful */ int cmdline_partition(struct parsed_partitions *state) { sector_t disk_size; struct cmdline_parts *parts; if (cmdline) { if (bdev_parts) cmdline_parts_free(&bdev_parts); if (cmdline_parts_parse(&bdev_parts, cmdline)) { cmdline = NULL; return -1; } cmdline = NULL; } if (!bdev_parts) return 0; parts = cmdline_parts_find(bdev_parts, state->disk->disk_name); if (!parts) return 0; disk_size = get_capacity(state->disk) << 9; cmdline_parts_set(parts, disk_size, state); cmdline_parts_verifier(1, state); strlcat(state->pp_buf, "\n", PAGE_SIZE); return 1; }
605 2 65 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM scsi #if !defined(_TRACE_SCSI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCSI_H #include <scsi/scsi_cmnd.h> #include <scsi/scsi_host.h> #include <linux/tracepoint.h> #include <linux/trace_seq.h> #define scsi_opcode_name(opcode) { opcode, #opcode } #define show_opcode_name(val) \ __print_symbolic(val, \ scsi_opcode_name(TEST_UNIT_READY), \ scsi_opcode_name(REZERO_UNIT), \ scsi_opcode_name(REQUEST_SENSE), \ scsi_opcode_name(FORMAT_UNIT), \ scsi_opcode_name(READ_BLOCK_LIMITS), \ scsi_opcode_name(REASSIGN_BLOCKS), \ scsi_opcode_name(INITIALIZE_ELEMENT_STATUS), \ scsi_opcode_name(READ_6), \ scsi_opcode_name(WRITE_6), \ scsi_opcode_name(SEEK_6), \ scsi_opcode_name(READ_REVERSE), \ scsi_opcode_name(WRITE_FILEMARKS), \ scsi_opcode_name(SPACE), \ scsi_opcode_name(INQUIRY), \ scsi_opcode_name(RECOVER_BUFFERED_DATA), \ scsi_opcode_name(MODE_SELECT), \ scsi_opcode_name(RESERVE), \ scsi_opcode_name(RELEASE), \ scsi_opcode_name(COPY), \ scsi_opcode_name(ERASE), \ scsi_opcode_name(MODE_SENSE), \ scsi_opcode_name(START_STOP), \ scsi_opcode_name(RECEIVE_DIAGNOSTIC), \ scsi_opcode_name(SEND_DIAGNOSTIC), \ scsi_opcode_name(ALLOW_MEDIUM_REMOVAL), \ scsi_opcode_name(SET_WINDOW), \ scsi_opcode_name(READ_CAPACITY), \ scsi_opcode_name(READ_10), \ scsi_opcode_name(WRITE_10), \ scsi_opcode_name(SEEK_10), \ scsi_opcode_name(POSITION_TO_ELEMENT), \ scsi_opcode_name(WRITE_VERIFY), \ scsi_opcode_name(VERIFY), \ scsi_opcode_name(SEARCH_HIGH), \ scsi_opcode_name(SEARCH_EQUAL), \ scsi_opcode_name(SEARCH_LOW), \ scsi_opcode_name(SET_LIMITS), \ scsi_opcode_name(PRE_FETCH), \ scsi_opcode_name(READ_POSITION), \ scsi_opcode_name(SYNCHRONIZE_CACHE), \ scsi_opcode_name(LOCK_UNLOCK_CACHE), \ scsi_opcode_name(READ_DEFECT_DATA), \ scsi_opcode_name(MEDIUM_SCAN), \ scsi_opcode_name(COMPARE), \ scsi_opcode_name(COPY_VERIFY), \ scsi_opcode_name(WRITE_BUFFER), \ scsi_opcode_name(READ_BUFFER), \ scsi_opcode_name(UPDATE_BLOCK), \ scsi_opcode_name(READ_LONG), \ scsi_opcode_name(WRITE_LONG), \ scsi_opcode_name(CHANGE_DEFINITION), \ scsi_opcode_name(WRITE_SAME), \ scsi_opcode_name(UNMAP), \ scsi_opcode_name(READ_TOC), \ scsi_opcode_name(LOG_SELECT), \ scsi_opcode_name(LOG_SENSE), \ scsi_opcode_name(XDWRITEREAD_10), \ scsi_opcode_name(MODE_SELECT_10), \ scsi_opcode_name(RESERVE_10), \ scsi_opcode_name(RELEASE_10), \ scsi_opcode_name(MODE_SENSE_10), \ scsi_opcode_name(PERSISTENT_RESERVE_IN), \ scsi_opcode_name(PERSISTENT_RESERVE_OUT), \ scsi_opcode_name(VARIABLE_LENGTH_CMD), \ scsi_opcode_name(REPORT_LUNS), \ scsi_opcode_name(MAINTENANCE_IN), \ scsi_opcode_name(MAINTENANCE_OUT), \ scsi_opcode_name(MOVE_MEDIUM), \ scsi_opcode_name(EXCHANGE_MEDIUM), \ scsi_opcode_name(READ_12), \ scsi_opcode_name(WRITE_12), \ scsi_opcode_name(WRITE_VERIFY_12), \ scsi_opcode_name(SEARCH_HIGH_12), \ scsi_opcode_name(SEARCH_EQUAL_12), \ scsi_opcode_name(SEARCH_LOW_12), \ scsi_opcode_name(READ_ELEMENT_STATUS), \ scsi_opcode_name(SEND_VOLUME_TAG), \ scsi_opcode_name(WRITE_LONG_2), \ scsi_opcode_name(READ_16), \ scsi_opcode_name(WRITE_16), \ scsi_opcode_name(VERIFY_16), \ scsi_opcode_name(WRITE_SAME_16), \ scsi_opcode_name(ZBC_OUT), \ scsi_opcode_name(ZBC_IN), \ scsi_opcode_name(SERVICE_ACTION_IN_16), \ scsi_opcode_name(READ_32), \ scsi_opcode_name(WRITE_32), \ scsi_opcode_name(WRITE_SAME_32), \ scsi_opcode_name(ATA_16), \ scsi_opcode_name(ATA_12)) #define scsi_hostbyte_name(result) { result, #result } #define show_hostbyte_name(val) \ __print_symbolic(val, \ scsi_hostbyte_name(DID_OK), \ scsi_hostbyte_name(DID_NO_CONNECT), \ scsi_hostbyte_name(DID_BUS_BUSY), \ scsi_hostbyte_name(DID_TIME_OUT), \ scsi_hostbyte_name(DID_BAD_TARGET), \ scsi_hostbyte_name(DID_ABORT), \ scsi_hostbyte_name(DID_PARITY), \ scsi_hostbyte_name(DID_ERROR), \ scsi_hostbyte_name(DID_RESET), \ scsi_hostbyte_name(DID_BAD_INTR), \ scsi_hostbyte_name(DID_PASSTHROUGH), \ scsi_hostbyte_name(DID_SOFT_ERROR), \ scsi_hostbyte_name(DID_IMM_RETRY), \ scsi_hostbyte_name(DID_REQUEUE), \ scsi_hostbyte_name(DID_TRANSPORT_DISRUPTED), \ scsi_hostbyte_name(DID_TRANSPORT_FAILFAST)) #define scsi_statusbyte_name(result) { result, #result } #define show_statusbyte_name(val) \ __print_symbolic(val, \ scsi_statusbyte_name(SAM_STAT_GOOD), \ scsi_statusbyte_name(SAM_STAT_CHECK_CONDITION), \ scsi_statusbyte_name(SAM_STAT_CONDITION_MET), \ scsi_statusbyte_name(SAM_STAT_BUSY), \ scsi_statusbyte_name(SAM_STAT_INTERMEDIATE), \ scsi_statusbyte_name(SAM_STAT_INTERMEDIATE_CONDITION_MET), \ scsi_statusbyte_name(SAM_STAT_RESERVATION_CONFLICT), \ scsi_statusbyte_name(SAM_STAT_COMMAND_TERMINATED), \ scsi_statusbyte_name(SAM_STAT_TASK_SET_FULL), \ scsi_statusbyte_name(SAM_STAT_ACA_ACTIVE), \ scsi_statusbyte_name(SAM_STAT_TASK_ABORTED)) #define scsi_prot_op_name(result) { result, #result } #define show_prot_op_name(val) \ __print_symbolic(val, \ scsi_prot_op_name(SCSI_PROT_NORMAL),