32 32 15 6 1 36 4 3 7 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* * linux/can/skb.h * * Definitions for the CAN network socket buffer * * Copyright (C) 2012 Oliver Hartkopp <socketcan@hartkopp.net> * */ #ifndef _CAN_SKB_H #define _CAN_SKB_H #include <linux/types.h> #include <linux/skbuff.h> #include <linux/can.h> #include <net/sock.h> void can_flush_echo_skb(struct net_device *dev); int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *len_ptr, unsigned int *frame_len_ptr); unsigned int __must_check can_get_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); void can_free_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); struct sk_buff *alloc_canxl_skb(struct net_device *dev, struct canxl_frame **cxl, unsigned int data_len); struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb); /* * The struct can_skb_priv is used to transport additional information along * with the stored struct can(fd)_frame that can not be contained in existing * struct sk_buff elements. * N.B. that this information must not be modified in cloned CAN sk_buffs. * To modify the CAN frame content or the struct can_skb_priv content * skb_copy() needs to be used instead of skb_clone(). */ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on * @skbcnt: atomic counter to have an unique id together with skb pointer * @frame_len: length of CAN frame in data link layer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; int skbcnt; unsigned int frame_len; struct can_frame cf[]; }; static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb) { return (struct can_skb_priv *)(skb->head); } static inline void can_skb_reserve(struct sk_buff *skb) { skb_reserve(skb, sizeof(struct can_skb_priv)); } static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { /* If the socket has already been closed by user space, the * refcount may already be 0 (and the socket will be freed * after the last TX skb has been freed). So only increase * socket refcount if the refcount is > 0. */ if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb->destructor = sock_efree; skb->sk = sk; } } /* * returns an unshared skb owned by the original sock to be echo'ed back */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { struct sk_buff *nskb; nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { kfree_skb(skb); return NULL; } can_skb_set_owner(nskb, skb->sk); consume_skb(skb); return nskb; } static inline bool can_is_can_skb(const struct sk_buff *skb) { struct can_frame *cf = (struct can_frame *)skb->data; /* the CAN specific type of skb is identified by its data length */ return (skb->len == CAN_MTU && cf->len <= CAN_MAX_DLEN); } static inline bool can_is_canfd_skb(const struct sk_buff *skb) { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; /* the CAN specific type of skb is identified by its data length */ return (skb->len == CANFD_MTU && cfd->len <= CANFD_MAX_DLEN); } static inline bool can_is_canxl_skb(const struct sk_buff *skb) { const struct canxl_frame *cxl = (struct canxl_frame *)skb->data; if (skb->len < CANXL_HDR_SIZE + CANXL_MIN_DLEN || skb->len > CANXL_MTU) return false; /* this also checks valid CAN XL data length boundaries */ if (skb->len != CANXL_HDR_SIZE + cxl->len) return false; return cxl->flags & CANXL_XLF; } /* get length element value from can[|fd|xl]_frame structure */ static inline unsigned int can_skb_get_len_val(struct sk_buff *skb) { const struct canxl_frame *cxl = (struct canxl_frame *)skb->data; const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; if (can_is_canxl_skb(skb)) return cxl->len; return cfd->len; } /* get needed data length inside CAN frame for all frame types (RTR aware) */ static inline unsigned int can_skb_get_data_len(struct sk_buff *skb) { unsigned int len = can_skb_get_len_val(skb); const struct can_frame *cf = (struct can_frame *)skb->data; /* RTR frames have an actual length of zero */ if (can_is_can_skb(skb) && cf->can_id & CAN_RTR_FLAG) return 0; return len; } #endif /* !_CAN_SKB_H */ |
8 8 1 5 7 2 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hpfs/map.c * * Mikulas Patocka (mikulas@artax.karlin.mff.cuni.cz), 1998-1999 * * mapping structures to memory with some minimal checks */ #include "hpfs_fn.h" __le32 *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) { return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); } __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; __le32 *ret; unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) { hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); return NULL; } ret = hpfs_map_4sectors(s, sec, qbh, 4); if (ret) hpfs_prefetch_bitmap(s, bmp_block + 1); return ret; } void hpfs_prefetch_bitmap(struct super_block *s, unsigned bmp_block) { unsigned to_prefetch, next_prefetch; unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; if (unlikely(bmp_block >= n_bands)) return; to_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); if (unlikely(bmp_block + 1 >= n_bands)) next_prefetch = 0; else next_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block + 1]); hpfs_prefetch_sectors(s, to_prefetch, 4 + 4 * (to_prefetch + 4 == next_prefetch)); } /* * Load first code page into kernel memory, return pointer to 256-byte array, * first 128 bytes are uppercasing table for chars 128-255, next 128 bytes are * lowercasing table */ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) { struct buffer_head *bh; secno cpds; unsigned cpi; unsigned char *ptr; unsigned char *cp_table; int i; struct code_page_data *cpd; struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0); if (!cp) return NULL; if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) { pr_err("Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic)); brelse(bh); return NULL; } if (!le32_to_cpu(cp->n_code_pages)) { pr_err("n_code_pages == 0\n"); brelse(bh); return NULL; } cpds = le32_to_cpu(cp->array[0].code_page_data); cpi = le16_to_cpu(cp->array[0].index); brelse(bh); if (cpi >= 3) { pr_err("Code page index out of array\n"); return NULL; } if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL; if (le16_to_cpu(cpd->offs[cpi]) > 0x178) { pr_err("Code page index out of sector\n"); brelse(bh); return NULL; } ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6; if (!(cp_table = kmalloc(256, GFP_KERNEL))) { pr_err("out of memory for code page table\n"); brelse(bh); return NULL; } memcpy(cp_table, ptr, 128); brelse(bh); /* Try to build lowercasing table from uppercasing one */ for (i=128; i<256; i++) cp_table[i]=i; for (i=128; i<256; i++) if (cp_table[i-128]!=i && cp_table[i-128]>=128) cp_table[cp_table[i-128]] = i; return cp_table; } __le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) { struct buffer_head *bh; int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; int i; __le32 *b; if (!(b = kmalloc_array(n, 512, GFP_KERNEL))) { pr_err("can't allocate memory for bitmap directory\n"); return NULL; } for (i=0;i<n;i++) { __le32 *d = hpfs_map_sector(s, bmp+i, &bh, n - i - 1); if (!d) { kfree(b); return NULL; } memcpy((char *)b + 512 * i, d, 512); brelse(bh); } return b; } void hpfs_load_hotfix_map(struct super_block *s, struct hpfs_spare_block *spareblock) { struct quad_buffer_head qbh; __le32 *directory; u32 n_hotfixes, n_used_hotfixes; unsigned i; n_hotfixes = le32_to_cpu(spareblock->n_spares); n_used_hotfixes = le32_to_cpu(spareblock->n_spares_used); if (n_hotfixes > 256 || n_used_hotfixes > n_hotfixes) { hpfs_error(s, "invalid number of hotfixes: %u, used: %u", n_hotfixes, n_used_hotfixes); return; } if (!(directory = hpfs_map_4sectors(s, le32_to_cpu(spareblock->hotfix_map), &qbh, 0))) { hpfs_error(s, "can't load hotfix map"); return; } for (i = 0; i < n_used_hotfixes; i++) { hpfs_sb(s)->hotfix_from[i] = le32_to_cpu(directory[i]); hpfs_sb(s)->hotfix_to[i] = le32_to_cpu(directory[n_hotfixes + i]); } hpfs_sb(s)->n_hotfixes = n_used_hotfixes; hpfs_brelse4(&qbh); } /* * Load fnode to memory */ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_head **bhp) { struct fnode *fnode; if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ino, 1, "fnode")) { return NULL; } if ((fnode = hpfs_map_sector(s, ino, bhp, FNODE_RD_AHEAD))) { if (hpfs_sb(s)->sb_chk) { struct extended_attribute *ea; struct extended_attribute *ea_end; if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) { hpfs_error(s, "bad magic on fnode %08lx", (unsigned long)ino); goto bail; } if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != (bp_internal(&fnode->btree) ? 12 : 8)) { hpfs_error(s, "bad number of nodes in fnode %08lx", (unsigned long)ino); goto bail; } if (le16_to_cpu(fnode->btree.first_free) != 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", (unsigned long)ino); goto bail; } } if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) { hpfs_error(s, "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x", (unsigned long)ino, le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); goto bail; } ea = fnode_ea(fnode); ea_end = fnode_end_ea(fnode); while (ea != ea_end) { if (ea > ea_end) { hpfs_error(s, "bad EA in fnode %08lx", (unsigned long)ino); goto bail; } ea = next_ea(ea); } } } return fnode; bail: brelse(*bhp); return NULL; } struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buffer_head **bhp) { struct anode *anode; if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL; if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD))) if (hpfs_sb(s)->sb_chk) { if (le32_to_cpu(anode->magic) != ANODE_MAGIC) { hpfs_error(s, "bad magic on anode %08x", ano); goto bail; } if (le32_to_cpu(anode->self) != ano) { hpfs_error(s, "self pointer invalid on anode %08x", ano); goto bail; } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != (bp_internal(&anode->btree) ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } if (le16_to_cpu(anode->btree.first_free) != 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; } } return anode; bail: brelse(*bhp); return NULL; } /* * Load dnode to memory and do some checks */ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, struct quad_buffer_head *qbh) { struct dnode *dnode; if (hpfs_sb(s)->sb_chk) { if (hpfs_chk_sectors(s, secno, 4, "dnode")) return NULL; if (secno & 3) { hpfs_error(s, "dnode %08x not byte-aligned", secno); return NULL; } } if ((dnode = hpfs_map_4sectors(s, secno, qbh, DNODE_RD_AHEAD))) if (hpfs_sb(s)->sb_chk) { unsigned p, pp = 0; unsigned char *d = (unsigned char *)dnode; int b = 0; if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) { hpfs_error(s, "bad magic on dnode %08x", secno); goto bail; } if (le32_to_cpu(dnode->self) != secno) hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self)); /* Check dirents - bad dirents would cause infinite loops or shooting to memory */ if (le32_to_cpu(dnode->first_free) > 2048) { hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free)); goto bail; } for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) { struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p); if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) { hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) { if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & SB_RDONLY) goto ok; hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } ok: if (hpfs_sb(s)->sb_chk >= 2) b |= 1 << de->down; if (de->down) if (de_down_pointer(de) < 0x10) { hpfs_error(s, "bad down pointer in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } pp = p; } if (p != le32_to_cpu(dnode->first_free)) { hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno); goto bail; } if (d[pp + 30] != 1 || d[pp + 31] != 255) { hpfs_error(s, "dnode %08x does not end with \\377 entry", secno); goto bail; } if (b == 3) pr_err("unbalanced dnode tree, dnode %08x; see hpfs.txt 4 more info\n", secno); } return dnode; bail: hpfs_brelse4(qbh); return NULL; } dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino) { struct buffer_head *bh; struct fnode *fnode; dnode_secno dno; fnode = hpfs_map_fnode(s, ino, &bh); if (!fnode) return 0; dno = le32_to_cpu(fnode->u.external[0].disk_secno); brelse(bh); return dno; } |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | // SPDX-License-Identifier: LGPL-2.1 /* * SPNEGO upcall management for CIFS * * Copyright (c) 2007 Red Hat, Inc. * Author(s): Jeff Layton (jlayton@redhat.com) * */ #include <linux/list.h> #include <linux/slab.h> #include <linux/string.h> #include <keys/user-type.h> #include <linux/key-type.h> #include <linux/keyctl.h> #include <linux/inet.h> #include "cifsglob.h" #include "cifs_spnego.h" #include "cifs_debug.h" #include "cifsproto.h" static const struct cred *spnego_cred; /* create a new cifs key */ static int cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { char *payload; int ret; ret = -ENOMEM; payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL); if (!payload) goto error; /* attach the data */ key->payload.data[0] = payload; ret = 0; error: return ret; } static void cifs_spnego_key_destroy(struct key *key) { kfree(key->payload.data[0]); } /* * keytype for CIFS spnego keys */ struct key_type cifs_spnego_key_type = { .name = "cifs.spnego", .instantiate = cifs_spnego_key_instantiate, .destroy = cifs_spnego_key_destroy, .describe = user_describe, }; /* length of longest version string e.g. strlen("ver=0xFF") */ #define MAX_VER_STR_LEN 8 /* length of longest security mechanism name, eg in future could have * strlen(";sec=ntlmsspi") */ #define MAX_MECH_STR_LEN 13 /* strlen of ";host=" */ #define HOST_KEY_LEN 6 /* strlen of ";ip4=" or ";ip6=" */ #define IP_KEY_LEN 5 /* strlen of ";uid=0x" */ #define UID_KEY_LEN 7 /* strlen of ";creduid=0x" */ #define CREDUID_KEY_LEN 11 /* strlen of ";user=" */ #define USER_KEY_LEN 6 /* strlen of ";pid=0x" */ #define PID_KEY_LEN 7 /* get a key struct with a SPNEGO security blob, suitable for session setup */ struct key * cifs_get_spnego_key(struct cifs_ses *sesInfo, struct TCP_Server_Info *server) { struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr; char *description, *dp; size_t desc_len; struct key *spnego_key; const char *hostname = server->hostname; const struct cred *saved_cred; /* length of fields (with semicolons): ver=0xyz ip4=ipaddress host=hostname sec=mechanism uid=0xFF user=username */ desc_len = MAX_VER_STR_LEN + HOST_KEY_LEN + strlen(hostname) + IP_KEY_LEN + INET6_ADDRSTRLEN + MAX_MECH_STR_LEN + UID_KEY_LEN + (sizeof(uid_t) * 2) + CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; if (sesInfo->user_name) desc_len += USER_KEY_LEN + strlen(sesInfo->user_name); spnego_key = ERR_PTR(-ENOMEM); description = kzalloc(desc_len, GFP_KERNEL); if (description == NULL) goto out; dp = description; /* start with version and hostname portion of UNC string */ spnego_key = ERR_PTR(-EINVAL); sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION, hostname); dp = description + strlen(description); /* add the server address */ if (server->dstaddr.ss_family == AF_INET) sprintf(dp, "ip4=%pI4", &sa->sin_addr); else if (server->dstaddr.ss_family == AF_INET6) sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); else goto out; dp = description + strlen(description); /* for now, only sec=krb5 and sec=mskrb5 are valid */ if (server->sec_kerberos) sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) sprintf(dp, ";sec=mskrb5"); else { cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); sprintf(dp, ";sec=krb5"); } dp = description + strlen(description); sprintf(dp, ";uid=0x%x", from_kuid_munged(&init_user_ns, sesInfo->linux_uid)); dp = description + strlen(description); sprintf(dp, ";creduid=0x%x", from_kuid_munged(&init_user_ns, sesInfo->cred_uid)); if (sesInfo->user_name) { dp = description + strlen(description); sprintf(dp, ";user=%s", sesInfo->user_name); } dp = description + strlen(description); sprintf(dp, ";pid=0x%x", current->pid); cifs_dbg(FYI, "key description = %s\n", description); saved_cred = override_creds(spnego_cred); spnego_key = request_key(&cifs_spnego_key_type, description, ""); revert_creds(saved_cred); #ifdef CONFIG_CIFS_DEBUG2 if (cifsFYI && !IS_ERR(spnego_key)) { struct cifs_spnego_msg *msg = spnego_key->payload.data[0]; cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024U, msg->secblob_len + msg->sesskey_len)); } #endif /* CONFIG_CIFS_DEBUG2 */ out: kfree(description); return spnego_key; } int init_cifs_spnego(void) { struct cred *cred; struct key *keyring; int ret; cifs_dbg(FYI, "Registering the %s key type\n", cifs_spnego_key_type.name); /* * Create an override credential set with special thread keyring for * spnego upcalls. */ cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; keyring = keyring_alloc(".cifs_spnego", GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } ret = register_key_type(&cifs_spnego_key_type); if (ret < 0) goto failed_put_key; /* * instruct request_key() to use this special keyring as a cache for * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; spnego_cred = cred; cifs_dbg(FYI, "cifs spnego keyring: %d\n", key_serial(keyring)); return 0; failed_put_key: key_put(keyring); failed_put_cred: put_cred(cred); return ret; } void exit_cifs_spnego(void) { key_revoke(spnego_cred->thread_keyring); unregister_key_type(&cifs_spnego_key_type); put_cred(spnego_cred); cifs_dbg(FYI, "Unregistered %s key type\n", cifs_spnego_key_type.name); } |
6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * ocfs2.h * * Defines macros and structures used in OCFS2 * * Copyright (C) 2002, 2004 Oracle. All rights reserved. */ #ifndef OCFS2_H #define OCFS2_H #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/llist.h> #include <linux/rbtree.h> #include <linux/workqueue.h> #include <linux/kref.h> #include <linux/mutex.h> #include <linux/lockdep.h> #include <linux/jbd2.h> /* For union ocfs2_dlm_lksb */ #include "stackglue.h" #include "ocfs2_fs.h" #include "ocfs2_lockid.h" #include "ocfs2_ioctl.h" /* For struct ocfs2_blockcheck_stats */ #include "blockcheck.h" #include "reservations.h" #include "filecheck.h" /* Caching of metadata buffers */ /* Most user visible OCFS2 inodes will have very few pieces of * metadata, but larger files (including bitmaps, etc) must be taken * into account when designing an access scheme. We allow a small * amount of inlined blocks to be stored on an array and grow the * structure into a rb tree when necessary. */ #define OCFS2_CACHE_INFO_MAX_ARRAY 2 /* Flags for ocfs2_caching_info */ enum ocfs2_caching_info_flags { /* Indicates that the metadata cache is using the inline array */ OCFS2_CACHE_FL_INLINE = 1<<1, }; struct ocfs2_caching_operations; struct ocfs2_caching_info { /* * The parent structure provides the locks, but because the * parent structure can differ, it provides locking operations * to struct ocfs2_caching_info. */ const struct ocfs2_caching_operations *ci_ops; /* next two are protected by trans_inc_lock */ /* which transaction were we created on? Zero if none. */ unsigned long ci_created_trans; /* last transaction we were a part of. */ unsigned long ci_last_trans; /* Cache structures */ unsigned int ci_flags; unsigned int ci_num_cached; union { sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY]; struct rb_root ci_tree; } ci_cache; }; /* * Need this prototype here instead of in uptodate.h because journal.h * uses it. */ struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci); /* this limits us to 256 nodes * if we need more, we can do a kmalloc for the map */ #define OCFS2_NODE_MAP_MAX_NODES 256 struct ocfs2_node_map { u16 num_nodes; unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)]; }; enum ocfs2_ast_action { OCFS2_AST_INVALID = 0, OCFS2_AST_ATTACH, OCFS2_AST_CONVERT, OCFS2_AST_DOWNCONVERT, }; /* actions for an unlockast function to take. */ enum ocfs2_unlock_action { OCFS2_UNLOCK_INVALID = 0, OCFS2_UNLOCK_CANCEL_CONVERT, OCFS2_UNLOCK_DROP_LOCK, }; /* ocfs2_lock_res->l_flags flags. */ #define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized * the lvb */ #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in * dlm_lock */ #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to * downconvert*/ #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */ #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010) #define OCFS2_LOCK_REFRESHING (0x00000020) #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization * for shutdown paths */ #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track * when to skip queueing * a lock because it's * about to be * dropped. */ #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a call to dlm_lock. Only exists with BUSY set. */ #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread * from downconverting * before the upconvert * has completed */ #define OCFS2_LOCK_NONBLOCK_FINISHED (0x00001000) /* NONBLOCK cluster * lock has already * returned, do not block * dc thread from * downconverting */ struct ocfs2_lock_res_ops; typedef void (*ocfs2_lock_callback)(int status, unsigned long data); #ifdef CONFIG_OCFS2_FS_STATS struct ocfs2_lock_stats { u64 ls_total; /* Total wait in NSEC */ u32 ls_gets; /* Num acquires */ u32 ls_fail; /* Num failed acquires */ /* Storing max wait in usecs saves 24 bytes per inode */ u32 ls_max; /* Max wait in USEC */ u64 ls_last; /* Last unlock time in USEC */ }; #endif struct ocfs2_lock_res { void *l_priv; const struct ocfs2_lock_res_ops *l_ops; struct list_head l_blocked_list; struct list_head l_mask_waiters; struct list_head l_holders; unsigned long l_flags; char l_name[OCFS2_LOCK_ID_MAX_LEN]; unsigned int l_ro_holders; unsigned int l_ex_holders; signed char l_level; signed char l_requested; signed char l_blocking; /* Data packed - type enum ocfs2_lock_type */ unsigned char l_type; /* used from AST/BAST funcs. */ /* Data packed - enum type ocfs2_ast_action */ unsigned char l_action; /* Data packed - enum type ocfs2_unlock_action */ unsigned char l_unlock_action; unsigned int l_pending_gen; spinlock_t l_lock; struct ocfs2_dlm_lksb l_lksb; wait_queue_head_t l_event; struct list_head l_debug_list; #ifdef CONFIG_OCFS2_FS_STATS struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */ u32 l_lock_refresh; /* Disk refreshes */ u64 l_lock_wait; /* First lock wait time */ struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */ #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map l_lockdep_map; #endif }; enum ocfs2_orphan_reco_type { ORPHAN_NO_NEED_TRUNCATE = 0, ORPHAN_NEED_TRUNCATE, }; enum ocfs2_orphan_scan_state { ORPHAN_SCAN_ACTIVE, ORPHAN_SCAN_INACTIVE }; struct ocfs2_orphan_scan { struct mutex os_lock; struct ocfs2_super *os_osb; struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ struct delayed_work os_orphan_scan_work; time64_t os_scantime; /* time this node ran the scan */ u32 os_count; /* tracks node specific scans */ u32 os_seqno; /* tracks cluster wide scans */ atomic_t os_state; /* ACTIVE or INACTIVE */ }; struct ocfs2_dlm_debug { struct kref d_refcnt; u32 d_filter_secs; struct list_head d_lockres_tracking; }; enum ocfs2_vol_state { VOLUME_INIT = 0, VOLUME_MOUNTED, VOLUME_MOUNTED_QUOTAS, VOLUME_DISMOUNTED, VOLUME_DISABLED }; struct ocfs2_alloc_stats { atomic_t moves; atomic_t local_data; atomic_t bitmap_data; atomic_t bg_allocs; atomic_t bg_extends; }; enum ocfs2_local_alloc_state { OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for * this mountpoint. */ OCFS2_LA_ENABLED, /* Local alloc is in use. */ OCFS2_LA_THROTTLED, /* Local alloc is in use, but number * of bits has been reduced. */ OCFS2_LA_DISABLED /* Local alloc has temporarily been * disabled. */ }; enum ocfs2_mount_options { OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */ OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access control lists */ OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT writes */ OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ }; #define OCFS2_OSB_SOFT_RO 0x0001 #define OCFS2_OSB_HARD_RO 0x0002 #define OCFS2_OSB_ERROR_FS 0x0004 #define OCFS2_DEFAULT_ATIME_QUANTUM 60 struct ocfs2_triggers { struct jbd2_buffer_trigger_type ot_triggers; int ot_offset; struct super_block *sb; }; enum ocfs2_journal_trigger_type { OCFS2_JTR_DI, OCFS2_JTR_EB, OCFS2_JTR_RB, OCFS2_JTR_GD, OCFS2_JTR_DB, OCFS2_JTR_XB, OCFS2_JTR_DQ, OCFS2_JTR_DR, OCFS2_JTR_DL, OCFS2_JTR_NONE /* This must be the last entry */ }; #define OCFS2_JOURNAL_TRIGGER_COUNT OCFS2_JTR_NONE void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; struct ocfs2_replay_map; struct ocfs2_quota_recovery; struct ocfs2_super { struct task_struct *commit_task; struct super_block *sb; struct inode *root_inode; struct inode *sys_root_inode; struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; struct inode **local_system_inodes; struct ocfs2_slot_info *slot_info; u32 *slot_recovery_generations; spinlock_t node_map_lock; u64 root_blkno; u64 system_dir_blkno; u64 bitmap_blkno; u32 bitmap_cpg; char *uuid_str; u32 uuid_hash; u8 *vol_label; u64 first_cluster_group_blkno; u32 fs_generation; u32 s_feature_compat; u32 s_feature_incompat; u32 s_feature_ro_compat; /* Protects s_next_generation, osb_flags and s_inode_steal_slot. * Could protect more on osb as it's very short lived. */ spinlock_t osb_lock; u32 s_next_generation; unsigned long osb_flags; u16 s_inode_steal_slot; u16 s_meta_steal_slot; atomic_t s_num_inodes_stolen; atomic_t s_num_meta_stolen; unsigned long s_mount_opt; unsigned int s_atime_quantum; unsigned int max_slots; unsigned int node_num; int slot_num; int preferred_slot; int s_sectsize_bits; int s_clustersize; int s_clustersize_bits; unsigned int s_xattr_inline_size; atomic_t vol_state; struct mutex recovery_lock; struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; int disable_recovery; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; /* Journal triggers for checksum */ struct ocfs2_triggers s_journal_triggers[OCFS2_JOURNAL_TRIGGER_COUNT]; struct delayed_work la_enable_wq; /* * Must hold local alloc i_rwsem and osb->osb_lock to change * local_alloc_bits. Reads can be done under either lock. */ unsigned int local_alloc_bits; unsigned int local_alloc_default_bits; /* osb_clusters_at_boot can become stale! Do not trust it to * be up to date. */ unsigned int osb_clusters_at_boot; enum ocfs2_local_alloc_state local_alloc_state; /* protected * by osb_lock */ struct buffer_head *local_alloc_bh; u64 la_last_gd; struct ocfs2_reservation_map osb_la_resmap; unsigned int osb_resv_level; unsigned int osb_dir_resv_level; /* Next two fields are for local node slot recovery during * mount. */ struct ocfs2_dinode *local_alloc_copy; struct ocfs2_quota_recovery *quota_rec; struct ocfs2_blockcheck_stats osb_ecc_stats; struct ocfs2_alloc_stats alloc_stats; char dev_str[20]; /* "major,minor" of the device */ u8 osb_stackflags; char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; char osb_cluster_name[OCFS2_CLUSTER_NAME_LEN + 1]; struct ocfs2_cluster_connection *cconn; struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_rename_lockres; struct ocfs2_lock_res osb_nfs_sync_lockres; struct rw_semaphore nfs_sync_rwlock; struct ocfs2_lock_res osb_trim_fs_lockres; struct mutex obs_trim_fs_mutex; struct ocfs2_dlm_debug *osb_dlm_debug; struct dentry *osb_debug_root; wait_queue_head_t recovery_event; spinlock_t dc_task_lock; struct task_struct *dc_task; wait_queue_head_t dc_event; unsigned long dc_wake_sequence; unsigned long dc_work_sequence; /* * Any thread can add locks to the list, but the downconvert * thread is the only one allowed to remove locks. Any change * to this rule requires updating * ocfs2_downconvert_thread_do_work(). */ struct list_head blocked_lock_list; unsigned long blocked_lock_count; /* List of dquot structures to drop last reference to */ struct llist_head dquot_drop_list; struct work_struct dquot_drop_work; wait_queue_head_t osb_mount_event; /* Truncate log info */ struct inode *osb_tl_inode; struct buffer_head *osb_tl_bh; struct delayed_work osb_truncate_log_wq; atomic_t osb_tl_disable; /* * How many clusters in our truncate log. * It must be protected by osb_tl_inode->i_rwsem. */ unsigned int truncated_clusters; struct ocfs2_node_map osb_recovering_orphan_dirs; unsigned int *osb_orphan_wipes; wait_queue_head_t osb_wipe_event; struct ocfs2_orphan_scan osb_orphan_scan; /* used to protect metaecc calculation check of xattr. */ spinlock_t osb_xattr_lock; unsigned int osb_dx_mask; u32 osb_dx_seed[4]; /* the group we used to allocate inodes. */ u64 osb_inode_alloc_group; /* rb tree root for refcount lock. */ struct rb_root osb_rf_lock_tree; struct ocfs2_refcount_tree *osb_ref_tree_lru; struct mutex system_file_mutex; /* * OCFS2 needs to schedule several different types of work which * require cluster locking, disk I/O, recovery waits, etc. Since these * types of work tend to be heavy we avoid using the kernel events * workqueue and schedule on our own. */ struct workqueue_struct *ocfs2_wq; /* sysfs directory per partition */ struct kset *osb_dev_kset; /* file check related stuff */ struct ocfs2_filecheck_sysfs_entry osb_fc_ent; }; #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) /* Useful typedef for passing around journal access functions */ typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type); static inline int ocfs2_should_order_data(struct inode *inode) { if (!S_ISREG(inode->i_mode)) return 0; if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) return 0; return 1; } static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) return 1; return 0; } static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) { /* * Support for sparse files is a pre-requisite */ if (!ocfs2_sparse_alloc(osb)) return 0; if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) return 1; return 0; } static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO) return 1; return 0; } static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) return 1; return 0; } static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) return 1; return 0; } static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC) return 1; return 0; } static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) return 1; return 0; } static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) return 1; return 0; } static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) { if (ocfs2_supports_indexed_dirs(osb)) return OCFS2_DX_LINK_MAX; return OCFS2_LINK_MAX; } static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di) { u32 nlink = le16_to_cpu(di->i_links_count); u32 hi = le16_to_cpu(di->i_links_count_hi); nlink |= (hi << OCFS2_LINKS_HI_SHIFT); return nlink; } static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink) { u16 lo, hi; lo = nlink; hi = nlink >> OCFS2_LINKS_HI_SHIFT; di->i_links_count = cpu_to_le16(lo); di->i_links_count_hi = cpu_to_le16(hi); } static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n) { u32 links = ocfs2_read_links_count(di); links += n; ocfs2_set_links_count(di, links); } static inline int ocfs2_refcount_tree(struct ocfs2_super *osb) { if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) return 1; return 0; } /* set / clear functions because cluster events can make these happen * in parallel so we want the transitions to be atomic. this also * means that any future flags osb_flags must be protected by spinlock * too! */ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, unsigned long flag) { spin_lock(&osb->osb_lock); osb->osb_flags |= flag; spin_unlock(&osb->osb_lock); } static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, int hard) { spin_lock(&osb->osb_lock); osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO); if (hard) osb->osb_flags |= OCFS2_OSB_HARD_RO; else osb->osb_flags |= OCFS2_OSB_SOFT_RO; spin_unlock(&osb->osb_lock); } static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb) { int ret; spin_lock(&osb->osb_lock); ret = osb->osb_flags & OCFS2_OSB_HARD_RO; spin_unlock(&osb->osb_lock); return ret; } static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) { int ret; spin_lock(&osb->osb_lock); ret = osb->osb_flags & OCFS2_OSB_SOFT_RO; spin_unlock(&osb->osb_lock); return ret; } static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) { return (osb->s_feature_incompat & (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); } static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) { if (ocfs2_clusterinfo_valid(osb) && memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, OCFS2_STACK_LABEL_LEN)) return 1; return 0; } static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) { if (ocfs2_clusterinfo_valid(osb) && !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, OCFS2_STACK_LABEL_LEN)) return 1; return 0; } static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) { return ocfs2_o2cb_stack(osb) && (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); } static inline int ocfs2_mount_local(struct ocfs2_super *osb) { return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); } static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) { return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); } #define OCFS2_IS_VALID_DINODE(ptr) \ (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) #define OCFS2_IS_VALID_DX_ROOT(ptr) \ (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) #define OCFS2_IS_VALID_DX_LEAF(ptr) \ (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \ (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE)) static inline unsigned long ino_from_blkno(struct super_block *sb, u64 blkno) { return (unsigned long)(blkno & (u64)ULONG_MAX); } static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb, u32 clusters) { int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; return (u64)clusters << c_to_b_bits; } static inline u32 ocfs2_clusters_for_blocks(struct super_block *sb, u64 blocks) { int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; blocks += (1 << b_to_c_bits) - 1; return (u32)(blocks >> b_to_c_bits); } static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb, u64 blocks) { int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; return (u32)(blocks >> b_to_c_bits); } static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, u64 bytes) { int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; unsigned int clusters; bytes += OCFS2_SB(sb)->s_clustersize - 1; /* OCFS2 just cannot have enough clusters to overflow this */ clusters = (unsigned int)(bytes >> cl_bits); return clusters; } static inline unsigned int ocfs2_bytes_to_clusters(struct super_block *sb, u64 bytes) { int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; unsigned int clusters; clusters = (unsigned int)(bytes >> cl_bits); return clusters; } static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, u64 bytes) { bytes += sb->s_blocksize - 1; return bytes >> sb->s_blocksize_bits; } static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, u32 clusters) { return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; } static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb, u64 blocks) { int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; unsigned int clusters; clusters = ocfs2_blocks_to_clusters(sb, blocks); return (u64)clusters << bits; } static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, u64 bytes) { int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; unsigned int clusters; clusters = ocfs2_clusters_for_bytes(sb, bytes); return (u64)clusters << cl_bits; } static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb, u64 bytes) { u64 blocks; blocks = ocfs2_blocks_for_bytes(sb, bytes); return blocks << sb->s_blocksize_bits; } static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes) { return (unsigned long)((bytes + 511) >> 9); } static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb, unsigned long pg_index) { u32 clusters = pg_index; unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; if (unlikely(PAGE_SHIFT > cbits)) clusters = pg_index << (PAGE_SHIFT - cbits); else if (PAGE_SHIFT < cbits) clusters = pg_index >> (cbits - PAGE_SHIFT); return clusters; } /* * Find the 1st page index which covers the given clusters. */ static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb, u32 clusters) { unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; pgoff_t index = clusters; if (PAGE_SHIFT > cbits) { index = (pgoff_t)clusters >> (PAGE_SHIFT - cbits); } else if (PAGE_SHIFT < cbits) { index = (pgoff_t)clusters << (cbits - PAGE_SHIFT); } return index; } static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) { unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; unsigned int pages_per_cluster = 1; if (PAGE_SHIFT < cbits) pages_per_cluster = 1 << (cbits - PAGE_SHIFT); return pages_per_cluster; } static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, unsigned int megs) { BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); } static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, unsigned int clusters) { return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits); } static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) { __set_bit_le(bit, bitmap); } #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) { __clear_bit_le(bit, bitmap); } #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) #define ocfs2_test_bit test_bit_le #define ocfs2_find_next_zero_bit find_next_zero_bit_le #define ocfs2_find_next_bit find_next_bit_le static inline void *correct_addr_and_bit_unaligned(int *bit, void *addr) { #if BITS_PER_LONG == 64 *bit += ((unsigned long) addr & 7UL) << 3; addr = (void *) ((unsigned long) addr & ~7UL); #elif BITS_PER_LONG == 32 *bit += ((unsigned long) addr & 3UL) << 3; addr = (void *) ((unsigned long) addr & ~3UL); #else #error "how many bits you are?!" #endif return addr; } static inline void ocfs2_set_bit_unaligned(int bit, void *bitmap) { bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); ocfs2_set_bit(bit, bitmap); } static inline void ocfs2_clear_bit_unaligned(int bit, void *bitmap) { bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); ocfs2_clear_bit(bit, bitmap); } static inline int ocfs2_test_bit_unaligned(int bit, void *bitmap) { bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); return ocfs2_test_bit(bit, bitmap); } static inline int ocfs2_find_next_zero_bit_unaligned(void *bitmap, int max, int start) { int fix = 0, ret, tmpmax; bitmap = correct_addr_and_bit_unaligned(&fix, bitmap); tmpmax = max + fix; start += fix; ret = ocfs2_find_next_zero_bit(bitmap, tmpmax, start) - fix; if (ret > max) return max; return ret; } #endif /* OCFS2_H */ |
18 28 15 6 62 53 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM xdp #if !defined(_TRACE_XDP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_XDP_H #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/tracepoint.h> #include <linux/bpf.h> #include <net/xdp.h> #define __XDP_ACT_MAP(FN) \ FN(ABORTED) \ FN(DROP) \ FN(PASS) \ FN(TX) \ FN(REDIRECT) #define __XDP_ACT_TP_FN(x) \ TRACE_DEFINE_ENUM(XDP_##x); #define __XDP_ACT_SYM_FN(x) \ { XDP_##x, #x }, #define __XDP_ACT_SYM_TAB \ __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, NULL } __XDP_ACT_MAP(__XDP_ACT_TP_FN) TRACE_EVENT(xdp_exception, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, u32 act), TP_ARGS(dev, xdp, act), TP_STRUCT__entry( __field(int, prog_id) __field(u32, act) __field(int, ifindex) ), TP_fast_assign( __entry->prog_id = xdp->aux->id; __entry->act = act; __entry->ifindex = dev->ifindex; ), TP_printk("prog_id=%d action=%s ifindex=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex) ); TRACE_EVENT(xdp_bulk_tx, TP_PROTO(const struct net_device *dev, int sent, int drops, int err), TP_ARGS(dev, sent, drops, err), TP_STRUCT__entry( __field(int, ifindex) __field(u32, act) __field(int, drops) __field(int, sent) __field(int, err) ), TP_fast_assign( __entry->ifindex = dev->ifindex; __entry->act = XDP_TX; __entry->drops = drops; __entry->sent = sent; __entry->err = err; ), TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d", __entry->ifindex, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->sent, __entry->drops, __entry->err) ); #ifndef __DEVMAP_OBJ_TYPE #define __DEVMAP_OBJ_TYPE struct _bpf_dtab_netdev { struct net_device *dev; }; #endif /* __DEVMAP_OBJ_TYPE */ DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index), TP_STRUCT__entry( __field(int, prog_id) __field(u32, act) __field(int, ifindex) __field(int, err) __field(int, to_ifindex) __field(u32, map_id) __field(int, map_index) ), TP_fast_assign( u32 ifindex = 0, map_index = index; if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) { /* Just leave to_ifindex to 0 if do broadcast redirect, * as tgt will be NULL. */ if (tgt) ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex; } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { ifindex = index; map_index = 0; } __entry->prog_id = xdp->aux->id; __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->err = err; __entry->to_ifindex = ifindex; __entry->map_id = map_id; __entry->map_index = map_index; ), TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" " map_id=%d map_index=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex, __entry->to_ifindex, __entry->err, __entry->map_id, __entry->map_index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); #define _trace_xdp_redirect(dev, xdp, to) \ trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) #define _trace_xdp_redirect_err(dev, xdp, to, err) \ trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) #define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \ trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index) #define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \ trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index) /* not used anymore, but kept around so as not to break old programs */ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, enum bpf_map_type map_type, u32 map_id, u32 index), TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); TRACE_EVENT(xdp_cpumap_kthread, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats), TP_ARGS(map_id, processed, drops, sched, xdp_stats), TP_STRUCT__entry( __field(int, map_id) __field(u32, act) __field(int, cpu) __field(unsigned int, drops) __field(unsigned int, processed) __field(int, sched) __field(unsigned int, xdp_pass) __field(unsigned int, xdp_drop) __field(unsigned int, xdp_redirect) ), TP_fast_assign( __entry->map_id = map_id; __entry->act = XDP_REDIRECT; __entry->cpu = smp_processor_id(); __entry->drops = drops; __entry->processed = processed; __entry->sched = sched; __entry->xdp_pass = xdp_stats->pass; __entry->xdp_drop = xdp_stats->drop; __entry->xdp_redirect = xdp_stats->redirect; ), TP_printk("kthread" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" " sched=%d" " xdp_pass=%u xdp_drop=%u xdp_redirect=%u", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, __entry->sched, __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect) ); TRACE_EVENT(xdp_cpumap_enqueue, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, int to_cpu), TP_ARGS(map_id, processed, drops, to_cpu), TP_STRUCT__entry( __field(int, map_id) __field(u32, act) __field(int, cpu) __field(unsigned int, drops) __field(unsigned int, processed) __field(int, to_cpu) ), TP_fast_assign( __entry->map_id = map_id; __entry->act = XDP_REDIRECT; __entry->cpu = smp_processor_id(); __entry->drops = drops; __entry->processed = processed; __entry->to_cpu = to_cpu; ), TP_printk("enqueue" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" " to_cpu=%d", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, __entry->to_cpu) ); TRACE_EVENT(xdp_devmap_xmit, TP_PROTO(const struct net_device *from_dev, const struct net_device *to_dev, int sent, int drops, int err), TP_ARGS(from_dev, to_dev, sent, drops, err), TP_STRUCT__entry( __field(int, from_ifindex) __field(u32, act) __field(int, to_ifindex) __field(int, drops) __field(int, sent) __field(int, err) ), TP_fast_assign( __entry->from_ifindex = from_dev->ifindex; __entry->act = XDP_REDIRECT; __entry->to_ifindex = to_dev->ifindex; __entry->drops = drops; __entry->sent = sent; __entry->err = err; ), TP_printk("ndo_xdp_xmit" " from_ifindex=%d to_ifindex=%d action=%s" " sent=%d drops=%d" " err=%d", __entry->from_ifindex, __entry->to_ifindex, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->sent, __entry->drops, __entry->err) ); /* Expect users already include <net/xdp.h>, but not xdp_priv.h */ #include <net/xdp_priv.h> #define __MEM_TYPE_MAP(FN) \ FN(PAGE_SHARED) \ FN(PAGE_ORDER0) \ FN(PAGE_POOL) \ FN(XSK_BUFF_POOL) #define __MEM_TYPE_TP_FN(x) \ TRACE_DEFINE_ENUM(MEM_TYPE_##x); #define __MEM_TYPE_SYM_FN(x) \ { MEM_TYPE_##x, #x }, #define __MEM_TYPE_SYM_TAB \ __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 } __MEM_TYPE_MAP(__MEM_TYPE_TP_FN) TRACE_EVENT(mem_disconnect, TP_PROTO(const struct xdp_mem_allocator *xa), TP_ARGS(xa), TP_STRUCT__entry( __field(const struct xdp_mem_allocator *, xa) __field(u32, mem_id) __field(u32, mem_type) __field(const void *, allocator) ), TP_fast_assign( __entry->xa = xa; __entry->mem_id = xa->mem.id; __entry->mem_type = xa->mem.type; __entry->allocator = xa->allocator; ), TP_printk("mem_id=%d mem_type=%s allocator=%p", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->allocator ) ); TRACE_EVENT(mem_connect, TP_PROTO(const struct xdp_mem_allocator *xa, const struct xdp_rxq_info *rxq), TP_ARGS(xa, rxq), TP_STRUCT__entry( __field(const struct xdp_mem_allocator *, xa) __field(u32, mem_id) __field(u32, mem_type) __field(const void *, allocator) __field(const struct xdp_rxq_info *, rxq) __field(int, ifindex) ), TP_fast_assign( __entry->xa = xa; __entry->mem_id = xa->mem.id; __entry->mem_type = xa->mem.type; __entry->allocator = xa->allocator; __entry->rxq = rxq; __entry->ifindex = rxq->dev->ifindex; ), TP_printk("mem_id=%d mem_type=%s allocator=%p" " ifindex=%d", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->allocator, __entry->ifindex ) ); TRACE_EVENT(mem_return_failed, TP_PROTO(const struct xdp_mem_info *mem, const struct page *page), TP_ARGS(mem, page), TP_STRUCT__entry( __field(const struct page *, page) __field(u32, mem_id) __field(u32, mem_type) ), TP_fast_assign( __entry->page = page; __entry->mem_id = mem->id; __entry->mem_type = mem->type; ), TP_printk("mem_id=%d mem_type=%s page=%p", __entry->mem_id, __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), __entry->page ) ); TRACE_EVENT(bpf_xdp_link_attach_failed, TP_PROTO(const char *msg), TP_ARGS(msg), TP_STRUCT__entry( __string(msg, msg) ), TP_fast_assign( __assign_str(msg); ), TP_printk("errmsg=%s", __get_str(msg)) ); #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h> |
15 2 1 11 24 10 1 2 1 1 17 1 16 9 16 11 10 6 12 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ #include <linux/mm.h> #include <linux/fs.h> #include <linux/posix_acl.h> #include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_dmap.h" #include "jfs_txnmgr.h" #include "jfs_xattr.h" #include "jfs_acl.h" #include "jfs_debug.h" int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; int rc = 0; rc = file_write_and_wait_range(file, start, end); if (rc) return rc; inode_lock(inode); if (!(inode->i_state & I_DIRTY_ALL) || (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); inode_unlock(inode); return rc; } rc |= jfs_commit_inode(inode, 1); inode_unlock(inode); return rc ? -EIO : 0; } static int jfs_open(struct inode *inode, struct file *file) { int rc; if ((rc = dquot_file_open(inode, file))) return rc; /* * We attempt to allow only one "active" file open per aggregate * group. Otherwise, appending to files in parallel can cause * fragmentation within the files. * * If the file is empty, it was probably just created and going * to be written to. If it has a size, we'll hold off until the * file is actually grown. */ if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE && (inode->i_size == 0)) { struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag == -1) { struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); atomic_inc(&jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } return 0; } static int jfs_release(struct inode *inode, struct file *file) { struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag != -1) { struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; atomic_dec(&bmap->db_active[ji->active_ag]); ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); return 0; } int jfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int rc; rc = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (rc) return rc; if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) { rc = dquot_initialize(inode); if (rc) return rc; } if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { rc = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (rc) return rc; } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); rc = inode_newsize_ok(inode, iattr->ia_size); if (rc) return rc; truncate_setsize(inode, iattr->ia_size); jfs_truncate(inode); } setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) rc = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); return rc; } const struct inode_operations jfs_file_inode_operations = { .listxattr = jfs_listxattr, .setattr = jfs_setattr, .fileattr_get = jfs_fileattr_get, .fileattr_set = jfs_fileattr_set, #ifdef CONFIG_JFS_POSIX_ACL .get_inode_acl = jfs_get_acl, .set_acl = jfs_set_acl, #endif }; const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, .unlocked_ioctl = jfs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; |
2 2 18 18 18 18 18 7 7 7 2 2 2 2 2 65 31 52 52 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 | // SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/revoke.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 * * Copyright 2000 Red Hat corp --- All Rights Reserved * * Journal revoke routines for the generic filesystem journaling code; * part of the ext2fs journaling system. * * Revoke is the mechanism used to prevent old log records for deleted * metadata from being replayed on top of newer data using the same * blocks. The revoke mechanism is used in two separate places: * * + Commit: during commit we write the entire list of the current * transaction's revoked blocks to the journal * * + Recovery: during recovery we record the transaction ID of all * revoked blocks. If there are multiple revoke records in the log * for a single block, only the last one counts, and if there is a log * entry for a block beyond the last revoke, then that log entry still * gets replayed. * * We can get interactions between revokes and new log data within a * single transaction: * * Block is revoked and then journaled: * The desired end result is the journaling of the new block, so we * cancel the revoke before the transaction commits. * * Block is journaled and then revoked: * The revoke must take precedence over the write of the block, so we * need either to cancel the journal entry or to write the revoke * later in the log than the log block. In this case, we choose the * latter: journaling a block cancels any revoke record for that block * in the current transaction, so any revoke for that block in the * transaction must have happened after the block was journaled and so * the revoke must take precedence. * * Block is revoked and then written as data: * The data write is allowed to succeed, but the revoke is _not_ * cancelled. We still need to prevent old log records from * overwriting the new data. We don't even need to clear the revoke * bit here. * * We cache revoke status of a buffer in the current transaction in b_states * bits. As the name says, revokevalid flag indicates that the cached revoke * status of a buffer is valid and we can rely on the cached status. * * Revoke information on buffers is a tri-state value: * * RevokeValid clear: no cached revoke status, need to look it up * RevokeValid set, Revoked clear: * buffer has not been revoked, and cancel_revoke * need do nothing. * RevokeValid set, Revoked set: * buffer has been revoked. * * Locking rules: * We keep two hash tables of revoke records. One hashtable belongs to the * running transaction (is pointed to by journal->j_revoke), the other one * belongs to the committing transaction. Accesses to the second hash table * happen only from the kjournald and no other thread touches this table. Also * journal_switch_revoke_table() which switches which hashtable belongs to the * running and which to the committing transaction is called only from * kjournald. Therefore we need no locks when accessing the hashtable belonging * to the committing transaction. * * All users operating on the hash table belonging to the running transaction * have a handle to the transaction. Therefore they are safe from kjournald * switching hash tables under them. For operations on the lists of entries in * the hash table j_revoke_lock is used. * * Finally, also replay code uses the hash tables but at this moment no one else * can touch them (filesystem isn't mounted yet) and hence no locking is * needed. */ #ifndef __KERNEL__ #include "jfs_user.h" #else #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/init.h> #include <linux/bio.h> #include <linux/log2.h> #include <linux/hash.h> #endif static struct kmem_cache *jbd2_revoke_record_cache; static struct kmem_cache *jbd2_revoke_table_cache; /* Each revoke record represents one single revoked block. During journal replay, this involves recording the transaction ID of the last transaction to revoke this block. */ struct jbd2_revoke_record_s { struct list_head hash; tid_t sequence; /* Used for recovery only */ unsigned long long blocknr; }; /* The revoke table is just a simple hash table of revoke records. */ struct jbd2_revoke_table_s { /* It is conceivable that we might want a larger hash table * for recovery. Must be a power of two. */ int hash_size; int hash_shift; struct list_head *hash_table; }; #ifdef __KERNEL__ static void write_one_revoke_record(transaction_t *, struct list_head *, struct buffer_head **, int *, struct jbd2_revoke_record_s *); static void flush_descriptor(journal_t *, struct buffer_head *, int); #endif /* Utility functions to maintain the revoke table */ static inline int hash(journal_t *journal, unsigned long long block) { return hash_64(block, journal->j_revoke->hash_shift); } static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, tid_t seq) { struct list_head *hash_list; struct jbd2_revoke_record_s *record; gfp_t gfp_mask = GFP_NOFS; if (journal_oom_retry) gfp_mask |= __GFP_NOFAIL; record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask); if (!record) return -ENOMEM; record->sequence = seq; record->blocknr = blocknr; hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; spin_lock(&journal->j_revoke_lock); list_add(&record->hash, hash_list); spin_unlock(&journal->j_revoke_lock); return 0; } /* Find a revoke record in the journal's hash table. */ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, unsigned long long blocknr) { struct list_head *hash_list; struct jbd2_revoke_record_s *record; hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; spin_lock(&journal->j_revoke_lock); record = (struct jbd2_revoke_record_s *) hash_list->next; while (&(record->hash) != hash_list) { if (record->blocknr == blocknr) { spin_unlock(&journal->j_revoke_lock); return record; } record = (struct jbd2_revoke_record_s *) record->hash.next; } spin_unlock(&journal->j_revoke_lock); return NULL; } void jbd2_journal_destroy_revoke_record_cache(void) { kmem_cache_destroy(jbd2_revoke_record_cache); jbd2_revoke_record_cache = NULL; } void jbd2_journal_destroy_revoke_table_cache(void) { kmem_cache_destroy(jbd2_revoke_table_cache); jbd2_revoke_table_cache = NULL; } int __init jbd2_journal_init_revoke_record_cache(void) { J_ASSERT(!jbd2_revoke_record_cache); jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); if (!jbd2_revoke_record_cache) { pr_emerg("JBD2: failed to create revoke_record cache\n"); return -ENOMEM; } return 0; } int __init jbd2_journal_init_revoke_table_cache(void) { J_ASSERT(!jbd2_revoke_table_cache); jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, SLAB_TEMPORARY); if (!jbd2_revoke_table_cache) { pr_emerg("JBD2: failed to create revoke_table cache\n"); return -ENOMEM; } return 0; } static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) { int shift = 0; int tmp = hash_size; struct jbd2_revoke_table_s *table; table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); if (!table) goto out; while((tmp >>= 1UL) != 0UL) shift++; table->hash_size = hash_size; table->hash_shift = shift; table->hash_table = kmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL); if (!table->hash_table) { kmem_cache_free(jbd2_revoke_table_cache, table); table = NULL; goto out; } for (tmp = 0; tmp < hash_size; tmp++) INIT_LIST_HEAD(&table->hash_table[tmp]); out: return table; } static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) { int i; struct list_head *hash_list; for (i = 0; i < table->hash_size; i++) { hash_list = &table->hash_table[i]; J_ASSERT(list_empty(hash_list)); } kfree(table->hash_table); kmem_cache_free(jbd2_revoke_table_cache, table); } /* Initialise the revoke table for a given journal to a given size. */ int jbd2_journal_init_revoke(journal_t *journal, int hash_size) { J_ASSERT(journal->j_revoke_table[0] == NULL); J_ASSERT(is_power_of_2(hash_size)); journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size); if (!journal->j_revoke_table[0]) goto fail0; journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size); if (!journal->j_revoke_table[1]) goto fail1; journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; fail1: jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); journal->j_revoke_table[0] = NULL; fail0: return -ENOMEM; } /* Destroy a journal's revoke table. The table must already be empty! */ void jbd2_journal_destroy_revoke(journal_t *journal) { journal->j_revoke = NULL; if (journal->j_revoke_table[0]) jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); if (journal->j_revoke_table[1]) jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]); } #ifdef __KERNEL__ /* * jbd2_journal_revoke: revoke a given buffer_head from the journal. This * prevents the block from being replayed during recovery if we take a * crash after this current transaction commits. Any subsequent * metadata writes of the buffer in this transaction cancel the * revoke. * * Note that this call may block --- it is up to the caller to make * sure that there are no further calls to journal_write_metadata * before the revoke is complete. In ext3, this implies calling the * revoke before clearing the block bitmap when we are deleting * metadata. * * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a * parameter, but does _not_ forget the buffer_head if the bh was only * found implicitly. * * bh_in may not be a journalled buffer - it may have come off * the hash tables without an attached journal_head. * * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count * by one. */ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, struct buffer_head *bh_in) { struct buffer_head *bh = NULL; journal_t *journal; struct block_device *bdev; int err; might_sleep(); if (bh_in) BUFFER_TRACE(bh_in, "enter"); journal = handle->h_transaction->t_journal; if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){ J_ASSERT (!"Cannot set revoke feature!"); return -EINVAL; } bdev = journal->j_fs_dev; bh = bh_in; if (!bh) { bh = __find_get_block(bdev, blocknr, journal->j_blocksize); if (bh) BUFFER_TRACE(bh, "found on hash"); } #ifdef JBD2_EXPENSIVE_CHECKING else { struct buffer_head *bh2; /* If there is a different buffer_head lying around in * memory anywhere... */ bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); if (bh2) { /* ... and it has RevokeValid status... */ if (bh2 != bh && buffer_revokevalid(bh2)) /* ...then it better be revoked too, * since it's illegal to create a revoke * record against a buffer_head which is * not marked revoked --- that would * risk missing a subsequent revoke * cancel. */ J_ASSERT_BH(bh2, buffer_revoked(bh2)); put_bh(bh2); } } #endif if (WARN_ON_ONCE(handle->h_revoke_credits <= 0)) { if (!bh_in) brelse(bh); return -EIO; } /* We really ought not ever to revoke twice in a row without first having the revoke cancelled: it's illegal to free a block twice without allocating it in between! */ if (bh) { if (!J_EXPECT_BH(bh, !buffer_revoked(bh), "inconsistent data on disk")) { if (!bh_in) brelse(bh); return -EIO; } set_buffer_revoked(bh); set_buffer_revokevalid(bh); if (bh_in) { BUFFER_TRACE(bh_in, "call jbd2_journal_forget"); jbd2_journal_forget(handle, bh_in); } else { BUFFER_TRACE(bh, "call brelse"); __brelse(bh); } } handle->h_revoke_credits--; jbd2_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in); err = insert_revoke_hash(journal, blocknr, handle->h_transaction->t_tid); BUFFER_TRACE(bh_in, "exit"); return err; } /* * Cancel an outstanding revoke. For use only internally by the * journaling code (called from jbd2_journal_get_write_access). * * We trust buffer_revoked() on the buffer if the buffer is already * being journaled: if there is no revoke pending on the buffer, then we * don't do anything here. * * This would break if it were possible for a buffer to be revoked and * discarded, and then reallocated within the same transaction. In such * a case we would have lost the revoked bit, but when we arrived here * the second time we would still have a pending revoke to cancel. So, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. */ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { struct jbd2_revoke_record_s *record; journal_t *journal = handle->h_transaction->t_journal; int need_cancel; int did_revoke = 0; /* akpm: debug */ struct buffer_head *bh = jh2bh(jh); jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh); /* Is the existing Revoke bit valid? If so, we trust it, and * only perform the full cancel if the revoke bit is set. If * not, we can't trust the revoke bit, and we need to do the * full search for a revoke record. */ if (test_set_buffer_revokevalid(bh)) { need_cancel = test_clear_buffer_revoked(bh); } else { need_cancel = 1; clear_buffer_revoked(bh); } if (need_cancel) { record = find_revoke_record(journal, bh->b_blocknr); if (record) { jbd2_debug(4, "cancelled existing revoke on " "blocknr %llu\n", (unsigned long long)bh->b_blocknr); spin_lock(&journal->j_revoke_lock); list_del(&record->hash); spin_unlock(&journal->j_revoke_lock); kmem_cache_free(jbd2_revoke_record_cache, record); did_revoke = 1; } } #ifdef JBD2_EXPENSIVE_CHECKING /* There better not be one left behind by now! */ record = find_revoke_record(journal, bh->b_blocknr); J_ASSERT_JH(jh, record == NULL); #endif /* Finally, have we just cleared revoke on an unhashed * buffer_head? If so, we'd better make sure we clear the * revoked status on any hashed alias too, otherwise the revoke * state machine will get very upset later on. */ if (need_cancel) { struct buffer_head *bh2; bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); if (bh2) { if (bh2 != bh) clear_buffer_revoked(bh2); __brelse(bh2); } } return did_revoke; } /* * journal_clear_revoked_flag clears revoked flag of buffers in * revoke table to reflect there is no revoked buffers in the next * transaction which is going to be started. */ void jbd2_clear_buffer_revoked_flags(journal_t *journal) { struct jbd2_revoke_table_s *revoke = journal->j_revoke; int i = 0; for (i = 0; i < revoke->hash_size; i++) { struct list_head *hash_list; struct list_head *list_entry; hash_list = &revoke->hash_table[i]; list_for_each(list_entry, hash_list) { struct jbd2_revoke_record_s *record; struct buffer_head *bh; record = (struct jbd2_revoke_record_s *)list_entry; bh = __find_get_block(journal->j_fs_dev, record->blocknr, journal->j_blocksize); if (bh) { clear_buffer_revoked(bh); __brelse(bh); } } } } /* journal_switch_revoke table select j_revoke for next transaction * we do not want to suspend any processing until all revokes are * written -bzzz */ void jbd2_journal_switch_revoke_table(journal_t *journal) { int i; if (journal->j_revoke == journal->j_revoke_table[0]) journal->j_revoke = journal->j_revoke_table[1]; else journal->j_revoke = journal->j_revoke_table[0]; for (i = 0; i < journal->j_revoke->hash_size; i++) INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); } /* * Write revoke records to the journal for all entries in the current * revoke hash, deleting the entries as we go. */ void jbd2_journal_write_revoke_records(transaction_t *transaction, struct list_head *log_bufs) { journal_t *journal = transaction->t_journal; struct buffer_head *descriptor; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; struct list_head *hash_list; int i, offset, count; descriptor = NULL; offset = 0; count = 0; /* select revoke table for committing transaction */ revoke = journal->j_revoke == journal->j_revoke_table[0] ? journal->j_revoke_table[1] : journal->j_revoke_table[0]; for (i = 0; i < revoke->hash_size; i++) { hash_list = &revoke->hash_table[i]; while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s *) hash_list->next; write_one_revoke_record(transaction, log_bufs, &descriptor, &offset, record); count++; list_del(&record->hash); kmem_cache_free(jbd2_revoke_record_cache, record); } } if (descriptor) flush_descriptor(journal, descriptor, offset); jbd2_debug(1, "Wrote %d revoke records\n", count); } /* * Write out one revoke record. We need to create a new descriptor * block if the old one is full or if we have not already created one. */ static void write_one_revoke_record(transaction_t *transaction, struct list_head *log_bufs, struct buffer_head **descriptorp, int *offsetp, struct jbd2_revoke_record_s *record) { journal_t *journal = transaction->t_journal; int csum_size = 0; struct buffer_head *descriptor; int sz, offset; /* If we are already aborting, this all becomes a noop. We still need to go round the loop in jbd2_journal_write_revoke_records in order to free all of the revoke records: only the IO to the journal is omitted. */ if (is_journal_aborted(journal)) return; descriptor = *descriptorp; offset = *offsetp; /* Do we need to leave space at the end for a checksum? */ if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_block_tail); if (jbd2_has_feature_64bit(journal)) sz = 8; else sz = 4; /* Make sure we have a descriptor with space left for the record */ if (descriptor) { if (offset + sz > journal->j_blocksize - csum_size) { flush_descriptor(journal, descriptor, offset); descriptor = NULL; } } if (!descriptor) { descriptor = jbd2_journal_get_descriptor_buffer(transaction, JBD2_REVOKE_BLOCK); if (!descriptor) return; /* Record it so that we can wait for IO completion later */ BUFFER_TRACE(descriptor, "file in log_bufs"); jbd2_file_log_bh(log_bufs, descriptor); offset = sizeof(jbd2_journal_revoke_header_t); *descriptorp = descriptor; } if (jbd2_has_feature_64bit(journal)) * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); else * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); offset += sz; *offsetp = offset; } /* * Flush a revoke descriptor out to the journal. If we are aborting, * this is a noop; otherwise we are generating a buffer which needs to * be waited for during commit, so it has to go onto the appropriate * journal buffer list. */ static void flush_descriptor(journal_t *journal, struct buffer_head *descriptor, int offset) { jbd2_journal_revoke_header_t *header; if (is_journal_aborted(journal)) return; header = (jbd2_journal_revoke_header_t *)descriptor->b_data; header->r_count = cpu_to_be32(offset); jbd2_descriptor_block_csum_set(journal, descriptor); set_buffer_jwrite(descriptor); BUFFER_TRACE(descriptor, "write"); set_buffer_dirty(descriptor); write_dirty_buffer(descriptor, REQ_SYNC); } #endif /* * Revoke support for recovery. * * Recovery needs to be able to: * * record all revoke records, including the tid of the latest instance * of each revoke in the journal * * check whether a given block in a given transaction should be replayed * (ie. has not been revoked by a revoke record in that or a subsequent * transaction) * * empty the revoke table after recovery. */ /* * First, setting revoke records. We create a new revoke record for * every block ever revoked in the log as we scan it for recovery, and * we update the existing records if we find multiple revokes for a * single block. */ int jbd2_journal_set_revoke(journal_t *journal, unsigned long long blocknr, tid_t sequence) { struct jbd2_revoke_record_s *record; record = find_revoke_record(journal, blocknr); if (record) { /* If we have multiple occurrences, only record the * latest sequence number in the hashed record */ if (tid_gt(sequence, record->sequence)) record->sequence = sequence; return 0; } return insert_revoke_hash(journal, blocknr, sequence); } /* * Test revoke records. For a given block referenced in the log, has * that block been revoked? A revoke record with a given transaction * sequence number revokes all blocks in that transaction and earlier * ones, but later transactions still need replayed. */ int jbd2_journal_test_revoke(journal_t *journal, unsigned long long blocknr, tid_t sequence) { struct jbd2_revoke_record_s *record; record = find_revoke_record(journal, blocknr); if (!record) return 0; if (tid_gt(sequence, record->sequence)) return 0; return 1; } /* * Finally, once recovery is over, we need to clear the revoke table so * that it can be reused by the running filesystem. */ void jbd2_journal_clear_revoke(journal_t *journal) { int i; struct list_head *hash_list; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; revoke = journal->j_revoke; for (i = 0; i < revoke->hash_size; i++) { hash_list = &revoke->hash_table[i]; while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s*) hash_list->next; list_del(&record->hash); kmem_cache_free(jbd2_revoke_record_cache, record); } } } |
3 10 10 10 6 3 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2018 Christoph Hellwig. * * DMA operations that map physical memory directly without using an IOMMU. */ #ifndef _KERNEL_DMA_DIRECT_H #define _KERNEL_DMA_DIRECT_H #include <linux/dma-direct.h> #include <linux/memremap.h> int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); bool dma_direct_can_mmap(struct device *dev); int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); bool dma_direct_all_ram_mapped(struct device *dev); size_t dma_direct_max_mapping_size(struct device *dev); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir); #else static inline void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { } #endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ defined(CONFIG_SWIOTLB) void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir); #else static inline void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { } static inline void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { } #endif static inline void dma_direct_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(dev, addr); swiotlb_sync_single_for_device(dev, paddr, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, size, dir); } static inline void dma_direct_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(dev, addr); if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(paddr, size, dir); arch_sync_dma_for_cpu_all(); } swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, size); } static inline dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); if (is_swiotlb_force_bounce(dev)) { if (is_pci_p2pdma_page(page)) return DMA_MAPPING_ERROR; return swiotlb_map(dev, phys, size, dir, attrs); } if (unlikely(!dma_capable(dev, dma_addr, size, true)) || dma_kmalloc_needs_bounce(dev, size, dir)) { if (is_pci_p2pdma_page(page)) return DMA_MAPPING_ERROR; if (is_swiotlb_active(dev)) return swiotlb_map(dev, phys, size, dir, attrs); dev_WARN_ONCE(dev, 1, "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); return DMA_MAPPING_ERROR; } if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) arch_sync_dma_for_device(phys, size, dir); return dma_addr; } static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t phys = dma_to_phys(dev, addr); if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ |
21 3 1 1 1 21 21 21 21 3 21 19 3 3 1 1 21 1 21 21 21 1 1 21 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 | // SPDX-License-Identifier: GPL-2.0+ /* * comedi/comedi_fops.c * comedi kernel module * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1997-2007 David A. Schleef <ds@schleef.org> * compat ioctls: * Author: Ian Abbott, MEV Ltd. <abbotti@mev.co.uk> * Copyright (C) 2007 MEV Ltd. <http://www.mev.co.uk/> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/fcntl.h> #include <linux/delay.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/comedi/comedidev.h> #include <linux/cdev.h> #include <linux/io.h> #include <linux/uaccess.h> #include <linux/compat.h> #include "comedi_internal.h" /* * comedi_subdevice "runflags" * COMEDI_SRF_RT: DEPRECATED: command is running real-time * COMEDI_SRF_ERROR: indicates an COMEDI_CB_ERROR event has occurred * since the last command was started * COMEDI_SRF_RUNNING: command is running * COMEDI_SRF_FREE_SPRIV: free s->private on detach * * COMEDI_SRF_BUSY_MASK: runflags that indicate the subdevice is "busy" */ #define COMEDI_SRF_RT BIT(1) #define COMEDI_SRF_ERROR BIT(2) #define COMEDI_SRF_RUNNING BIT(27) #define COMEDI_SRF_FREE_SPRIV BIT(31) #define COMEDI_SRF_BUSY_MASK (COMEDI_SRF_ERROR | COMEDI_SRF_RUNNING) /** * struct comedi_file - Per-file private data for COMEDI device * @dev: COMEDI device. * @read_subdev: Current "read" subdevice. * @write_subdev: Current "write" subdevice. * @last_detach_count: Last known detach count. * @last_attached: Last known attached/detached state. */ struct comedi_file { struct comedi_device *dev; struct comedi_subdevice *read_subdev; struct comedi_subdevice *write_subdev; unsigned int last_detach_count; unsigned int last_attached:1; }; #define COMEDI_NUM_MINORS 0x100 #define COMEDI_NUM_SUBDEVICE_MINORS \ (COMEDI_NUM_MINORS - COMEDI_NUM_BOARD_MINORS) static unsigned short comedi_num_legacy_minors; module_param(comedi_num_legacy_minors, ushort, 0444); MODULE_PARM_DESC(comedi_num_legacy_minors, "number of comedi minor devices to reserve for non-auto-configured devices (default 0)" ); unsigned int comedi_default_buf_size_kb = CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB; module_param(comedi_default_buf_size_kb, uint, 0644); MODULE_PARM_DESC(comedi_default_buf_size_kb, "default asynchronous buffer size in KiB (default " __MODULE_STRING(CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB) ")"); unsigned int comedi_default_buf_maxsize_kb = CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB; module_param(comedi_default_buf_maxsize_kb, uint, 0644); MODULE_PARM_DESC(comedi_default_buf_maxsize_kb, "default maximum size of asynchronous buffer in KiB (default " __MODULE_STRING(CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB) ")"); static DEFINE_MUTEX(comedi_board_minor_table_lock); static struct comedi_device *comedi_board_minor_table[COMEDI_NUM_BOARD_MINORS]; static DEFINE_MUTEX(comedi_subdevice_minor_table_lock); /* Note: indexed by minor - COMEDI_NUM_BOARD_MINORS. */ static struct comedi_subdevice *comedi_subdevice_minor_table[COMEDI_NUM_SUBDEVICE_MINORS]; static struct cdev comedi_cdev; static void comedi_device_init(struct comedi_device *dev) { kref_init(&dev->refcount); spin_lock_init(&dev->spinlock); mutex_init(&dev->mutex); init_rwsem(&dev->attach_lock); dev->minor = -1; } static void comedi_dev_kref_release(struct kref *kref) { struct comedi_device *dev = container_of(kref, struct comedi_device, refcount); mutex_destroy(&dev->mutex); put_device(dev->class_dev); kfree(dev); } /** * comedi_dev_put() - Release a use of a COMEDI device * @dev: COMEDI device. * * Must be called when a user of a COMEDI device is finished with it. * When the last user of the COMEDI device calls this function, the * COMEDI device is destroyed. * * Return: 1 if the COMEDI device is destroyed by this call or @dev is * NULL, otherwise return 0. Callers must not assume the COMEDI * device is still valid if this function returns 0. */ int comedi_dev_put(struct comedi_device *dev) { if (dev) return kref_put(&dev->refcount, comedi_dev_kref_release); return 1; } EXPORT_SYMBOL_GPL(comedi_dev_put); static struct comedi_device *comedi_dev_get(struct comedi_device *dev) { if (dev) kref_get(&dev->refcount); return dev; } static void comedi_device_cleanup(struct comedi_device *dev) { struct module *driver_module = NULL; if (!dev) return; mutex_lock(&dev->mutex); if (dev->attached) driver_module = dev->driver->module; comedi_device_detach(dev); if (driver_module && dev->use_count) module_put(driver_module); mutex_unlock(&dev->mutex); } static bool comedi_clear_board_dev(struct comedi_device *dev) { unsigned int i = dev->minor; bool cleared = false; lockdep_assert_held(&dev->mutex); mutex_lock(&comedi_board_minor_table_lock); if (dev == comedi_board_minor_table[i]) { comedi_board_minor_table[i] = NULL; cleared = true; } mutex_unlock(&comedi_board_minor_table_lock); return cleared; } static struct comedi_device *comedi_clear_board_minor(unsigned int minor) { struct comedi_device *dev; mutex_lock(&comedi_board_minor_table_lock); dev = comedi_board_minor_table[minor]; comedi_board_minor_table[minor] = NULL; mutex_unlock(&comedi_board_minor_table_lock); return dev; } static struct comedi_subdevice * comedi_subdevice_from_minor(const struct comedi_device *dev, unsigned int minor) { struct comedi_subdevice *s; unsigned int i = minor - COMEDI_NUM_BOARD_MINORS; mutex_lock(&comedi_subdevice_minor_table_lock); s = comedi_subdevice_minor_table[i]; if (s && s->device != dev) s = NULL; mutex_unlock(&comedi_subdevice_minor_table_lock); return s; } static struct comedi_device *comedi_dev_get_from_board_minor(unsigned int minor) { struct comedi_device *dev; mutex_lock(&comedi_board_minor_table_lock); dev = comedi_dev_get(comedi_board_minor_table[minor]); mutex_unlock(&comedi_board_minor_table_lock); return dev; } static struct comedi_device * comedi_dev_get_from_subdevice_minor(unsigned int minor) { struct comedi_device *dev; struct comedi_subdevice *s; unsigned int i = minor - COMEDI_NUM_BOARD_MINORS; mutex_lock(&comedi_subdevice_minor_table_lock); s = comedi_subdevice_minor_table[i]; dev = comedi_dev_get(s ? s->device : NULL); mutex_unlock(&comedi_subdevice_minor_table_lock); return dev; } /** * comedi_dev_get_from_minor() - Get COMEDI device by minor device number * @minor: Minor device number. * * Finds the COMEDI device associated with the minor device number, if any, * and increments its reference count. The COMEDI device is prevented from * being freed until a matching call is made to comedi_dev_put(). * * Return: A pointer to the COMEDI device if it exists, with its usage * reference incremented. Return NULL if no COMEDI device exists with the * specified minor device number. */ struct comedi_device *comedi_dev_get_from_minor(unsigned int minor) { if (minor < COMEDI_NUM_BOARD_MINORS) return comedi_dev_get_from_board_minor(minor); return comedi_dev_get_from_subdevice_minor(minor); } EXPORT_SYMBOL_GPL(comedi_dev_get_from_minor); static struct comedi_subdevice * comedi_read_subdevice(const struct comedi_device *dev, unsigned int minor) { struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (minor >= COMEDI_NUM_BOARD_MINORS) { s = comedi_subdevice_from_minor(dev, minor); if (!s || (s->subdev_flags & SDF_CMD_READ)) return s; } return dev->read_subdev; } static struct comedi_subdevice * comedi_write_subdevice(const struct comedi_device *dev, unsigned int minor) { struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (minor >= COMEDI_NUM_BOARD_MINORS) { s = comedi_subdevice_from_minor(dev, minor); if (!s || (s->subdev_flags & SDF_CMD_WRITE)) return s; } return dev->write_subdev; } static void comedi_file_reset(struct file *file) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi_subdevice *s, *read_s, *write_s; unsigned int minor = iminor(file_inode(file)); read_s = dev->read_subdev; write_s = dev->write_subdev; if (minor >= COMEDI_NUM_BOARD_MINORS) { s = comedi_subdevice_from_minor(dev, minor); if (!s || s->subdev_flags & SDF_CMD_READ) read_s = s; if (!s || s->subdev_flags & SDF_CMD_WRITE) write_s = s; } cfp->last_attached = dev->attached; cfp->last_detach_count = dev->detach_count; WRITE_ONCE(cfp->read_subdev, read_s); WRITE_ONCE(cfp->write_subdev, write_s); } static void comedi_file_check(struct file *file) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; if (cfp->last_attached != dev->attached || cfp->last_detach_count != dev->detach_count) comedi_file_reset(file); } static struct comedi_subdevice *comedi_file_read_subdevice(struct file *file) { struct comedi_file *cfp = file->private_data; comedi_file_check(file); return READ_ONCE(cfp->read_subdev); } static struct comedi_subdevice *comedi_file_write_subdevice(struct file *file) { struct comedi_file *cfp = file->private_data; comedi_file_check(file); return READ_ONCE(cfp->write_subdev); } static int resize_async_buffer(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int new_size) { struct comedi_async *async = s->async; int retval; lockdep_assert_held(&dev->mutex); if (new_size > async->max_bufsize) return -EPERM; if (s->busy) { dev_dbg(dev->class_dev, "subdevice is busy, cannot resize buffer\n"); return -EBUSY; } if (comedi_buf_is_mmapped(s)) { dev_dbg(dev->class_dev, "subdevice is mmapped, cannot resize buffer\n"); return -EBUSY; } /* make sure buffer is an integral number of pages (we round up) */ new_size = (new_size + PAGE_SIZE - 1) & PAGE_MASK; retval = comedi_buf_alloc(dev, s, new_size); if (retval < 0) return retval; if (s->buf_change) { retval = s->buf_change(dev, s); if (retval < 0) return retval; } dev_dbg(dev->class_dev, "subd %d buffer resized to %i bytes\n", s->index, async->prealloc_bufsz); return 0; } /* sysfs attribute files */ static ssize_t max_read_buffer_kb_show(struct device *csdev, struct device_attribute *attr, char *buf) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size = 0; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_read_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_READ) && s->async) size = s->async->max_bufsize / 1024; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return sysfs_emit(buf, "%u\n", size); } static ssize_t max_read_buffer_kb_store(struct device *csdev, struct device_attribute *attr, const char *buf, size_t count) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size; int err; err = kstrtouint(buf, 10, &size); if (err) return err; if (size > (UINT_MAX / 1024)) return -EINVAL; size *= 1024; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_read_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_READ) && s->async) s->async->max_bufsize = size; else err = -EINVAL; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return err ? err : count; } static DEVICE_ATTR_RW(max_read_buffer_kb); static ssize_t read_buffer_kb_show(struct device *csdev, struct device_attribute *attr, char *buf) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size = 0; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_read_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_READ) && s->async) size = s->async->prealloc_bufsz / 1024; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return sysfs_emit(buf, "%u\n", size); } static ssize_t read_buffer_kb_store(struct device *csdev, struct device_attribute *attr, const char *buf, size_t count) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size; int err; err = kstrtouint(buf, 10, &size); if (err) return err; if (size > (UINT_MAX / 1024)) return -EINVAL; size *= 1024; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_read_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_READ) && s->async) err = resize_async_buffer(dev, s, size); else err = -EINVAL; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return err ? err : count; } static DEVICE_ATTR_RW(read_buffer_kb); static ssize_t max_write_buffer_kb_show(struct device *csdev, struct device_attribute *attr, char *buf) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size = 0; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_write_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_WRITE) && s->async) size = s->async->max_bufsize / 1024; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return sysfs_emit(buf, "%u\n", size); } static ssize_t max_write_buffer_kb_store(struct device *csdev, struct device_attribute *attr, const char *buf, size_t count) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size; int err; err = kstrtouint(buf, 10, &size); if (err) return err; if (size > (UINT_MAX / 1024)) return -EINVAL; size *= 1024; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_write_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_WRITE) && s->async) s->async->max_bufsize = size; else err = -EINVAL; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return err ? err : count; } static DEVICE_ATTR_RW(max_write_buffer_kb); static ssize_t write_buffer_kb_show(struct device *csdev, struct device_attribute *attr, char *buf) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size = 0; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_write_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_WRITE) && s->async) size = s->async->prealloc_bufsz / 1024; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return sysfs_emit(buf, "%u\n", size); } static ssize_t write_buffer_kb_store(struct device *csdev, struct device_attribute *attr, const char *buf, size_t count) { unsigned int minor = MINOR(csdev->devt); struct comedi_device *dev; struct comedi_subdevice *s; unsigned int size; int err; err = kstrtouint(buf, 10, &size); if (err) return err; if (size > (UINT_MAX / 1024)) return -EINVAL; size *= 1024; dev = comedi_dev_get_from_minor(minor); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); s = comedi_write_subdevice(dev, minor); if (s && (s->subdev_flags & SDF_CMD_WRITE) && s->async) err = resize_async_buffer(dev, s, size); else err = -EINVAL; mutex_unlock(&dev->mutex); comedi_dev_put(dev); return err ? err : count; } static DEVICE_ATTR_RW(write_buffer_kb); static struct attribute *comedi_dev_attrs[] = { &dev_attr_max_read_buffer_kb.attr, &dev_attr_read_buffer_kb.attr, &dev_attr_max_write_buffer_kb.attr, &dev_attr_write_buffer_kb.attr, NULL, }; ATTRIBUTE_GROUPS(comedi_dev); static const struct class comedi_class = { .name = "comedi", .dev_groups = comedi_dev_groups, }; static void comedi_free_board_dev(struct comedi_device *dev) { if (dev) { comedi_device_cleanup(dev); if (dev->class_dev) { device_destroy(&comedi_class, MKDEV(COMEDI_MAJOR, dev->minor)); } comedi_dev_put(dev); } } static void __comedi_clear_subdevice_runflags(struct comedi_subdevice *s, unsigned int bits) { s->runflags &= ~bits; } static void __comedi_set_subdevice_runflags(struct comedi_subdevice *s, unsigned int bits) { s->runflags |= bits; } static void comedi_update_subdevice_runflags(struct comedi_subdevice *s, unsigned int mask, unsigned int bits) { unsigned long flags; spin_lock_irqsave(&s->spin_lock, flags); __comedi_clear_subdevice_runflags(s, mask); __comedi_set_subdevice_runflags(s, bits & mask); spin_unlock_irqrestore(&s->spin_lock, flags); } static unsigned int __comedi_get_subdevice_runflags(struct comedi_subdevice *s) { return s->runflags; } static unsigned int comedi_get_subdevice_runflags(struct comedi_subdevice *s) { unsigned long flags; unsigned int runflags; spin_lock_irqsave(&s->spin_lock, flags); runflags = __comedi_get_subdevice_runflags(s); spin_unlock_irqrestore(&s->spin_lock, flags); return runflags; } static bool comedi_is_runflags_running(unsigned int runflags) { return runflags & COMEDI_SRF_RUNNING; } static bool comedi_is_runflags_in_error(unsigned int runflags) { return runflags & COMEDI_SRF_ERROR; } /** * comedi_is_subdevice_running() - Check if async command running on subdevice * @s: COMEDI subdevice. * * Return: %true if an asynchronous COMEDI command is active on the * subdevice, else %false. */ bool comedi_is_subdevice_running(struct comedi_subdevice *s) { unsigned int runflags = comedi_get_subdevice_runflags(s); return comedi_is_runflags_running(runflags); } EXPORT_SYMBOL_GPL(comedi_is_subdevice_running); static bool __comedi_is_subdevice_running(struct comedi_subdevice *s) { unsigned int runflags = __comedi_get_subdevice_runflags(s); return comedi_is_runflags_running(runflags); } bool comedi_can_auto_free_spriv(struct comedi_subdevice *s) { unsigned int runflags = __comedi_get_subdevice_runflags(s); return runflags & COMEDI_SRF_FREE_SPRIV; } /** * comedi_set_spriv_auto_free() - Mark subdevice private data as freeable * @s: COMEDI subdevice. * * Mark the subdevice as having a pointer to private data that can be * automatically freed when the COMEDI device is detached from the low-level * driver. */ void comedi_set_spriv_auto_free(struct comedi_subdevice *s) { __comedi_set_subdevice_runflags(s, COMEDI_SRF_FREE_SPRIV); } EXPORT_SYMBOL_GPL(comedi_set_spriv_auto_free); /** * comedi_alloc_spriv - Allocate memory for the subdevice private data * @s: COMEDI subdevice. * @size: Size of the memory to allocate. * * Allocate memory for the subdevice private data and point @s->private * to it. The memory will be freed automatically when the COMEDI device * is detached from the low-level driver. * * Return: A pointer to the allocated memory @s->private on success. * Return NULL on failure. */ void *comedi_alloc_spriv(struct comedi_subdevice *s, size_t size) { s->private = kzalloc(size, GFP_KERNEL); if (s->private) comedi_set_spriv_auto_free(s); return s->private; } EXPORT_SYMBOL_GPL(comedi_alloc_spriv); /* * This function restores a subdevice to an idle state. */ static void do_become_nonbusy(struct comedi_device *dev, struct comedi_subdevice *s) { struct comedi_async *async = s->async; lockdep_assert_held(&dev->mutex); comedi_update_subdevice_runflags(s, COMEDI_SRF_RUNNING, 0); if (async) { comedi_buf_reset(s); async->inttrig = NULL; kfree(async->cmd.chanlist); async->cmd.chanlist = NULL; s->busy = NULL; wake_up_interruptible_all(&async->wait_head); } else { dev_err(dev->class_dev, "BUG: (?) %s called with async=NULL\n", __func__); s->busy = NULL; } } static int do_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { int ret = 0; lockdep_assert_held(&dev->mutex); if (comedi_is_subdevice_running(s) && s->cancel) ret = s->cancel(dev, s); do_become_nonbusy(dev, s); return ret; } void comedi_device_cancel_all(struct comedi_device *dev) { struct comedi_subdevice *s; int i; lockdep_assert_held(&dev->mutex); if (!dev->attached) return; for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; if (s->async) do_cancel(dev, s); } } static int is_device_busy(struct comedi_device *dev) { struct comedi_subdevice *s; int i; lockdep_assert_held(&dev->mutex); if (!dev->attached) return 0; for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; if (s->busy) return 1; if (s->async && comedi_buf_is_mmapped(s)) return 1; } return 0; } /* * COMEDI_DEVCONFIG ioctl * attaches (and configures) or detaches a legacy device * * arg: * pointer to comedi_devconfig structure (NULL if detaching) * * reads: * comedi_devconfig structure (if attaching) * * writes: * nothing */ static int do_devconfig_ioctl(struct comedi_device *dev, struct comedi_devconfig __user *arg) { struct comedi_devconfig it; lockdep_assert_held(&dev->mutex); if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!arg) { if (is_device_busy(dev)) return -EBUSY; if (dev->attached) { struct module *driver_module = dev->driver->module; comedi_device_detach(dev); module_put(driver_module); } return 0; } if (copy_from_user(&it, arg, sizeof(it))) return -EFAULT; it.board_name[COMEDI_NAMELEN - 1] = 0; if (it.options[COMEDI_DEVCONF_AUX_DATA_LENGTH]) { dev_warn(dev->class_dev, "comedi_config --init_data is deprecated\n"); return -EINVAL; } if (dev->minor >= comedi_num_legacy_minors) /* don't re-use dynamically allocated comedi devices */ return -EBUSY; /* This increments the driver module count on success. */ return comedi_device_attach(dev, &it); } /* * COMEDI_BUFCONFIG ioctl * buffer configuration * * arg: * pointer to comedi_bufconfig structure * * reads: * comedi_bufconfig structure * * writes: * modified comedi_bufconfig structure */ static int do_bufconfig_ioctl(struct comedi_device *dev, struct comedi_bufconfig __user *arg) { struct comedi_bufconfig bc; struct comedi_async *async; struct comedi_subdevice *s; int retval = 0; lockdep_assert_held(&dev->mutex); if (copy_from_user(&bc, arg, sizeof(bc))) return -EFAULT; if (bc.subdevice >= dev->n_subdevices) return -EINVAL; s = &dev->subdevices[bc.subdevice]; async = s->async; if (!async) { dev_dbg(dev->class_dev, "subdevice does not have async capability\n"); bc.size = 0; bc.maximum_size = 0; goto copyback; } if (bc.maximum_size) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; async->max_bufsize = bc.maximum_size; } if (bc.size) { retval = resize_async_buffer(dev, s, bc.size); if (retval < 0) return retval; } bc.size = async->prealloc_bufsz; bc.maximum_size = async->max_bufsize; copyback: if (copy_to_user(arg, &bc, sizeof(bc))) return -EFAULT; return 0; } /* * COMEDI_DEVINFO ioctl * device info * * arg: * pointer to comedi_devinfo structure * * reads: * nothing * * writes: * comedi_devinfo structure */ static int do_devinfo_ioctl(struct comedi_device *dev, struct comedi_devinfo __user *arg, struct file *file) { struct comedi_subdevice *s; struct comedi_devinfo devinfo; lockdep_assert_held(&dev->mutex); memset(&devinfo, 0, sizeof(devinfo)); /* fill devinfo structure */ devinfo.version_code = COMEDI_VERSION_CODE; devinfo.n_subdevs = dev->n_subdevices; strscpy(devinfo.driver_name, dev->driver->driver_name, COMEDI_NAMELEN); strscpy(devinfo.board_name, dev->board_name, COMEDI_NAMELEN); s = comedi_file_read_subdevice(file); if (s) devinfo.read_subdevice = s->index; else devinfo.read_subdevice = -1; s = comedi_file_write_subdevice(file); if (s) devinfo.write_subdevice = s->index; else devinfo.write_subdevice = -1; if (copy_to_user(arg, &devinfo, sizeof(devinfo))) return -EFAULT; return 0; } /* * COMEDI_SUBDINFO ioctl * subdevices info * * arg: * pointer to array of comedi_subdinfo structures * * reads: * nothing * * writes: * array of comedi_subdinfo structures */ static int do_subdinfo_ioctl(struct comedi_device *dev, struct comedi_subdinfo __user *arg, void *file) { int ret, i; struct comedi_subdinfo *tmp, *us; struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); tmp = kcalloc(dev->n_subdevices, sizeof(*tmp), GFP_KERNEL); if (!tmp) return -ENOMEM; /* fill subdinfo structs */ for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; us = tmp + i; us->type = s->type; us->n_chan = s->n_chan; us->subd_flags = s->subdev_flags; if (comedi_is_subdevice_running(s)) us->subd_flags |= SDF_RUNNING; #define TIMER_nanosec 5 /* backwards compatibility */ us->timer_type = TIMER_nanosec; us->len_chanlist = s->len_chanlist; us->maxdata = s->maxdata; if (s->range_table) { us->range_type = (i << 24) | (0 << 16) | (s->range_table->length); } else { us->range_type = 0; /* XXX */ } if (s->busy) us->subd_flags |= SDF_BUSY; if (s->busy == file) us->subd_flags |= SDF_BUSY_OWNER; if (s->lock) us->subd_flags |= SDF_LOCKED; if (s->lock == file) us->subd_flags |= SDF_LOCK_OWNER; if (!s->maxdata && s->maxdata_list) us->subd_flags |= SDF_MAXDATA; if (s->range_table_list) us->subd_flags |= SDF_RANGETYPE; if (s->do_cmd) us->subd_flags |= SDF_CMD; if (s->insn_bits != &insn_inval) us->insn_bits_support = COMEDI_SUPPORTED; else us->insn_bits_support = COMEDI_UNSUPPORTED; } ret = copy_to_user(arg, tmp, dev->n_subdevices * sizeof(*tmp)); kfree(tmp); return ret ? -EFAULT : 0; } /* * COMEDI_CHANINFO ioctl * subdevice channel info * * arg: * pointer to comedi_chaninfo structure * * reads: * comedi_chaninfo structure * * writes: * array of maxdata values to chaninfo->maxdata_list if requested * array of range table lengths to chaninfo->range_table_list if requested */ static int do_chaninfo_ioctl(struct comedi_device *dev, struct comedi_chaninfo *it) { struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (it->subdev >= dev->n_subdevices) return -EINVAL; s = &dev->subdevices[it->subdev]; if (it->maxdata_list) { if (s->maxdata || !s->maxdata_list) return -EINVAL; if (copy_to_user(it->maxdata_list, s->maxdata_list, s->n_chan * sizeof(unsigned int))) return -EFAULT; } if (it->flaglist) return -EINVAL; /* flaglist not supported */ if (it->rangelist) { int i; if (!s->range_table_list) return -EINVAL; for (i = 0; i < s->n_chan; i++) { int x; x = (dev->minor << 28) | (it->subdev << 24) | (i << 16) | (s->range_table_list[i]->length); if (put_user(x, it->rangelist + i)) return -EFAULT; } } return 0; } /* * COMEDI_BUFINFO ioctl * buffer information * * arg: * pointer to comedi_bufinfo structure * * reads: * comedi_bufinfo structure * * writes: * modified comedi_bufinfo structure */ static int do_bufinfo_ioctl(struct comedi_device *dev, struct comedi_bufinfo __user *arg, void *file) { struct comedi_bufinfo bi; struct comedi_subdevice *s; struct comedi_async *async; unsigned int runflags; int retval = 0; bool become_nonbusy = false; lockdep_assert_held(&dev->mutex); if (copy_from_user(&bi, arg, sizeof(bi))) return -EFAULT; if (bi.subdevice >= dev->n_subdevices) return -EINVAL; s = &dev->subdevices[bi.subdevice]; async = s->async; if (!async || s->busy != file) return -EINVAL; runflags = comedi_get_subdevice_runflags(s); if (!(async->cmd.flags & CMDF_WRITE)) { /* command was set up in "read" direction */ if (bi.bytes_read) { comedi_buf_read_alloc(s, bi.bytes_read); bi.bytes_read = comedi_buf_read_free(s, bi.bytes_read); } /* * If nothing left to read, and command has stopped, and * {"read" position not updated or command stopped normally}, * then become non-busy. */ if (comedi_buf_read_n_available(s) == 0 && !comedi_is_runflags_running(runflags) && (bi.bytes_read == 0 || !comedi_is_runflags_in_error(runflags))) { become_nonbusy = true; if (comedi_is_runflags_in_error(runflags)) retval = -EPIPE; } bi.bytes_written = 0; } else { /* command was set up in "write" direction */ if (!comedi_is_runflags_running(runflags)) { bi.bytes_written = 0; become_nonbusy = true; if (comedi_is_runflags_in_error(runflags)) retval = -EPIPE; } else if (bi.bytes_written) { comedi_buf_write_alloc(s, bi.bytes_written); bi.bytes_written = comedi_buf_write_free(s, bi.bytes_written); } bi.bytes_read = 0; } bi.buf_write_count = async->buf_write_count; bi.buf_write_ptr = async->buf_write_ptr; bi.buf_read_count = async->buf_read_count; bi.buf_read_ptr = async->buf_read_ptr; if (become_nonbusy) do_become_nonbusy(dev, s); if (retval) return retval; if (copy_to_user(arg, &bi, sizeof(bi))) return -EFAULT; return 0; } static int check_insn_config_length(struct comedi_insn *insn, unsigned int *data) { if (insn->n < 1) return -EINVAL; switch (data[0]) { case INSN_CONFIG_DIO_OUTPUT: case INSN_CONFIG_DIO_INPUT: case INSN_CONFIG_DISARM: case INSN_CONFIG_RESET: if (insn->n == 1) return 0; break; case INSN_CONFIG_ARM: case INSN_CONFIG_DIO_QUERY: case INSN_CONFIG_BLOCK_SIZE: case INSN_CONFIG_FILTER: case INSN_CONFIG_SERIAL_CLOCK: case INSN_CONFIG_BIDIRECTIONAL_DATA: case INSN_CONFIG_ALT_SOURCE: case INSN_CONFIG_SET_COUNTER_MODE: case INSN_CONFIG_8254_READ_STATUS: case INSN_CONFIG_SET_ROUTING: case INSN_CONFIG_GET_ROUTING: case INSN_CONFIG_GET_PWM_STATUS: case INSN_CONFIG_PWM_SET_PERIOD: case INSN_CONFIG_PWM_GET_PERIOD: if (insn->n == 2) return 0; break; case INSN_CONFIG_SET_GATE_SRC: case INSN_CONFIG_GET_GATE_SRC: case INSN_CONFIG_SET_CLOCK_SRC: case INSN_CONFIG_GET_CLOCK_SRC: case INSN_CONFIG_SET_OTHER_SRC: case INSN_CONFIG_GET_COUNTER_STATUS: case INSN_CONFIG_GET_PWM_OUTPUT: case INSN_CONFIG_PWM_SET_H_BRIDGE: case INSN_CONFIG_PWM_GET_H_BRIDGE: case INSN_CONFIG_GET_HARDWARE_BUFFER_SIZE: if (insn->n == 3) return 0; break; case INSN_CONFIG_PWM_OUTPUT: case INSN_CONFIG_ANALOG_TRIG: case INSN_CONFIG_TIMER_1: if (insn->n == 5) return 0; break; case INSN_CONFIG_DIGITAL_TRIG: if (insn->n == 6) return 0; break; case INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS: if (insn->n >= 4) return 0; break; /* * by default we allow the insn since we don't have checks for * all possible cases yet */ default: pr_warn("No check for data length of config insn id %i is implemented\n", data[0]); pr_warn("Add a check to %s in %s\n", __func__, __FILE__); pr_warn("Assuming n=%i is correct\n", insn->n); return 0; } return -EINVAL; } static int check_insn_device_config_length(struct comedi_insn *insn, unsigned int *data) { if (insn->n < 1) return -EINVAL; switch (data[0]) { case INSN_DEVICE_CONFIG_TEST_ROUTE: case INSN_DEVICE_CONFIG_CONNECT_ROUTE: case INSN_DEVICE_CONFIG_DISCONNECT_ROUTE: if (insn->n == 3) return 0; break; case INSN_DEVICE_CONFIG_GET_ROUTES: /* * Big enough for config_id and the length of the userland * memory buffer. Additional length should be in factors of 2 * to communicate any returned route pairs (source,destination). */ if (insn->n >= 2) return 0; break; } return -EINVAL; } /** * get_valid_routes() - Calls low-level driver get_valid_routes function to * either return a count of valid routes to user, or copy * of list of all valid device routes to buffer in * userspace. * @dev: comedi device pointer * @data: data from user insn call. The length of the data must be >= 2. * data[0] must contain the INSN_DEVICE_CONFIG config_id. * data[1](input) contains the number of _pairs_ for which memory is * allotted from the user. If the user specifies '0', then only * the number of pairs available is returned. * data[1](output) returns either the number of pairs available (if none * where requested) or the number of _pairs_ that are copied back * to the user. * data[2::2] returns each (source, destination) pair. * * Return: -EINVAL if low-level driver does not allocate and return routes as * expected. Returns 0 otherwise. */ static int get_valid_routes(struct comedi_device *dev, unsigned int *data) { lockdep_assert_held(&dev->mutex); data[1] = dev->get_valid_routes(dev, data[1], data + 2); return 0; } static int parse_insn(struct comedi_device *dev, struct comedi_insn *insn, unsigned int *data, void *file) { struct comedi_subdevice *s; int ret = 0; int i; lockdep_assert_held(&dev->mutex); if (insn->insn & INSN_MASK_SPECIAL) { /* a non-subdevice instruction */ switch (insn->insn) { case INSN_GTOD: { struct timespec64 tv; if (insn->n != 2) { ret = -EINVAL; break; } ktime_get_real_ts64(&tv); /* unsigned data safe until 2106 */ data[0] = (unsigned int)tv.tv_sec; data[1] = tv.tv_nsec / NSEC_PER_USEC; ret = 2; break; } case INSN_WAIT: if (insn->n != 1 || data[0] >= 100000) { ret = -EINVAL; break; } udelay(data[0] / 1000); ret = 1; break; case INSN_INTTRIG: if (insn->n != 1) { ret = -EINVAL; break; } if (insn->subdev >= dev->n_subdevices) { dev_dbg(dev->class_dev, "%d not usable subdevice\n", insn->subdev); ret = -EINVAL; break; } s = &dev->subdevices[insn->subdev]; if (!s->async) { dev_dbg(dev->class_dev, "no async\n"); ret = -EINVAL; break; } if (!s->async->inttrig) { dev_dbg(dev->class_dev, "no inttrig\n"); ret = -EAGAIN; break; } ret = s->async->inttrig(dev, s, data[0]); if (ret >= 0) ret = 1; break; case INSN_DEVICE_CONFIG: ret = check_insn_device_config_length(insn, data); if (ret) break; if (data[0] == INSN_DEVICE_CONFIG_GET_ROUTES) { /* * data[1] should be the number of _pairs_ that * the memory can hold. */ data[1] = (insn->n - 2) / 2; ret = get_valid_routes(dev, data); break; } /* other global device config instructions. */ ret = dev->insn_device_config(dev, insn, data); break; default: dev_dbg(dev->class_dev, "invalid insn\n"); ret = -EINVAL; break; } } else { /* a subdevice instruction */ unsigned int maxdata; if (insn->subdev >= dev->n_subdevices) { dev_dbg(dev->class_dev, "subdevice %d out of range\n", insn->subdev); ret = -EINVAL; goto out; } s = &dev->subdevices[insn->subdev]; if (s->type == COMEDI_SUBD_UNUSED) { dev_dbg(dev->class_dev, "%d not usable subdevice\n", insn->subdev); ret = -EIO; goto out; } /* are we locked? (ioctl lock) */ if (s->lock && s->lock != file) { dev_dbg(dev->class_dev, "device locked\n"); ret = -EACCES; goto out; } ret = comedi_check_chanlist(s, 1, &insn->chanspec); if (ret < 0) { ret = -EINVAL; dev_dbg(dev->class_dev, "bad chanspec\n"); goto out; } if (s->busy) { ret = -EBUSY; goto out; } /* This looks arbitrary. It is. */ s->busy = parse_insn; switch (insn->insn) { case INSN_READ: ret = s->insn_read(dev, s, insn, data); if (ret == -ETIMEDOUT) { dev_dbg(dev->class_dev, "subdevice %d read instruction timed out\n", s->index); } break; case INSN_WRITE: maxdata = s->maxdata_list ? s->maxdata_list[CR_CHAN(insn->chanspec)] : s->maxdata; for (i = 0; i < insn->n; ++i) { if (data[i] > maxdata) { ret = -EINVAL; dev_dbg(dev->class_dev, "bad data value(s)\n"); break; } } if (ret == 0) { ret = s->insn_write(dev, s, insn, data); if (ret == -ETIMEDOUT) { dev_dbg(dev->class_dev, "subdevice %d write instruction timed out\n", s->index); } } break; case INSN_BITS: if (insn->n != 2) { ret = -EINVAL; } else { /* * Most drivers ignore the base channel in * insn->chanspec. Fix this here if * the subdevice has <= 32 channels. */ unsigned int orig_mask = data[0]; unsigned int shift = 0; if (s->n_chan <= 32) { shift = CR_CHAN(insn->chanspec); if (shift > 0) { insn->chanspec = 0; data[0] <<= shift; data[1] <<= shift; } } ret = s->insn_bits(dev, s, insn, data); data[0] = orig_mask; if (shift > 0) data[1] >>= shift; } break; case INSN_CONFIG: ret = check_insn_config_length(insn, data); if (ret) break; ret = s->insn_config(dev, s, insn, data); break; default: ret = -EINVAL; break; } s->busy = NULL; } out: return ret; } /* * COMEDI_INSNLIST ioctl * synchronous instruction list * * arg: * pointer to comedi_insnlist structure * * reads: * comedi_insnlist structure * array of comedi_insn structures from insnlist->insns pointer * data (for writes) from insns[].data pointers * * writes: * data (for reads) to insns[].data pointers */ /* arbitrary limits */ #define MIN_SAMPLES 16 #define MAX_SAMPLES 65536 static int do_insnlist_ioctl(struct comedi_device *dev, struct comedi_insn *insns, unsigned int n_insns, void *file) { unsigned int *data = NULL; unsigned int max_n_data_required = MIN_SAMPLES; int i = 0; int ret = 0; lockdep_assert_held(&dev->mutex); /* Determine maximum memory needed for all instructions. */ for (i = 0; i < n_insns; ++i) { if (insns[i].n > MAX_SAMPLES) { dev_dbg(dev->class_dev, "number of samples too large\n"); ret = -EINVAL; goto error; } max_n_data_required = max(max_n_data_required, insns[i].n); } /* Allocate scratch space for all instruction data. */ data = kmalloc_array(max_n_data_required, sizeof(unsigned int), GFP_KERNEL); if (!data) { ret = -ENOMEM; goto error; } for (i = 0; i < n_insns; ++i) { if (insns[i].insn & INSN_MASK_WRITE) { if (copy_from_user(data, insns[i].data, insns[i].n * sizeof(unsigned int))) { dev_dbg(dev->class_dev, "copy_from_user failed\n"); ret = -EFAULT; goto error; } } ret = parse_insn(dev, insns + i, data, file); if (ret < 0) goto error; if (insns[i].insn & INSN_MASK_READ) { if (copy_to_user(insns[i].data, data, insns[i].n * sizeof(unsigned int))) { dev_dbg(dev->class_dev, "copy_to_user failed\n"); ret = -EFAULT; goto error; } } if (need_resched()) schedule(); } error: kfree(data); if (ret < 0) return ret; return i; } /* * COMEDI_INSN ioctl * synchronous instruction * * arg: * pointer to comedi_insn structure * * reads: * comedi_insn structure * data (for writes) from insn->data pointer * * writes: * data (for reads) to insn->data pointer */ static int do_insn_ioctl(struct comedi_device *dev, struct comedi_insn *insn, void *file) { unsigned int *data = NULL; unsigned int n_data = MIN_SAMPLES; int ret = 0; lockdep_assert_held(&dev->mutex); n_data = max(n_data, insn->n); /* This is where the behavior of insn and insnlist deviate. */ if (insn->n > MAX_SAMPLES) { insn->n = MAX_SAMPLES; n_data = MAX_SAMPLES; } data = kmalloc_array(n_data, sizeof(unsigned int), GFP_KERNEL); if (!data) { ret = -ENOMEM; goto error; } if (insn->insn & INSN_MASK_WRITE) { if (copy_from_user(data, insn->data, insn->n * sizeof(unsigned int))) { ret = -EFAULT; goto error; } } ret = parse_insn(dev, insn, data, file); if (ret < 0) goto error; if (insn->insn & INSN_MASK_READ) { if (copy_to_user(insn->data, data, insn->n * sizeof(unsigned int))) { ret = -EFAULT; goto error; } } ret = insn->n; error: kfree(data); return ret; } static int __comedi_get_user_cmd(struct comedi_device *dev, struct comedi_cmd *cmd) { struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (cmd->subdev >= dev->n_subdevices) { dev_dbg(dev->class_dev, "%d no such subdevice\n", cmd->subdev); return -ENODEV; } s = &dev->subdevices[cmd->subdev]; if (s->type == COMEDI_SUBD_UNUSED) { dev_dbg(dev->class_dev, "%d not valid subdevice\n", cmd->subdev); return -EIO; } if (!s->do_cmd || !s->do_cmdtest || !s->async) { dev_dbg(dev->class_dev, "subdevice %d does not support commands\n", cmd->subdev); return -EIO; } /* make sure channel/gain list isn't too long */ if (cmd->chanlist_len > s->len_chanlist) { dev_dbg(dev->class_dev, "channel/gain list too long %d > %d\n", cmd->chanlist_len, s->len_chanlist); return -EINVAL; } /* * Set the CMDF_WRITE flag to the correct state if the subdevice * supports only "read" commands or only "write" commands. */ switch (s->subdev_flags & (SDF_CMD_READ | SDF_CMD_WRITE)) { case SDF_CMD_READ: cmd->flags &= ~CMDF_WRITE; break; case SDF_CMD_WRITE: cmd->flags |= CMDF_WRITE; break; default: break; } return 0; } static int __comedi_get_user_chanlist(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int __user *user_chanlist, struct comedi_cmd *cmd) { unsigned int *chanlist; int ret; lockdep_assert_held(&dev->mutex); cmd->chanlist = NULL; chanlist = memdup_array_user(user_chanlist, cmd->chanlist_len, sizeof(unsigned int)); if (IS_ERR(chanlist)) return PTR_ERR(chanlist); /* make sure each element in channel/gain list is valid */ ret = comedi_check_chanlist(s, cmd->chanlist_len, chanlist); if (ret < 0) { kfree(chanlist); return ret; } cmd->chanlist = chanlist; return 0; } /* * COMEDI_CMD ioctl * asynchronous acquisition command set-up * * arg: * pointer to comedi_cmd structure * * reads: * comedi_cmd structure * channel/range list from cmd->chanlist pointer * * writes: * possibly modified comedi_cmd structure (when -EAGAIN returned) */ static int do_cmd_ioctl(struct comedi_device *dev, struct comedi_cmd *cmd, bool *copy, void *file) { struct comedi_subdevice *s; struct comedi_async *async; unsigned int __user *user_chanlist; int ret; lockdep_assert_held(&dev->mutex); /* do some simple cmd validation */ ret = __comedi_get_user_cmd(dev, cmd); if (ret) return ret; /* save user's chanlist pointer so it can be restored later */ user_chanlist = (unsigned int __user *)cmd->chanlist; s = &dev->subdevices[cmd->subdev]; async = s->async; /* are we locked? (ioctl lock) */ if (s->lock && s->lock != file) { dev_dbg(dev->class_dev, "subdevice locked\n"); return -EACCES; } /* are we busy? */ if (s->busy) { dev_dbg(dev->class_dev, "subdevice busy\n"); return -EBUSY; } /* make sure channel/gain list isn't too short */ if (cmd->chanlist_len < 1) { dev_dbg(dev->class_dev, "channel/gain list too short %u < 1\n", cmd->chanlist_len); return -EINVAL; } async->cmd = *cmd; async->cmd.data = NULL; /* load channel/gain list */ ret = __comedi_get_user_chanlist(dev, s, user_chanlist, &async->cmd); if (ret) goto cleanup; ret = s->do_cmdtest(dev, s, &async->cmd); if (async->cmd.flags & CMDF_BOGUS || ret) { dev_dbg(dev->class_dev, "test returned %d\n", ret); *cmd = async->cmd; /* restore chanlist pointer before copying back */ cmd->chanlist = (unsigned int __force *)user_chanlist; cmd->data = NULL; *copy = true; ret = -EAGAIN; goto cleanup; } if (!async->prealloc_bufsz) { ret = -ENOMEM; dev_dbg(dev->class_dev, "no buffer (?)\n"); goto cleanup; } comedi_buf_reset(s); async->cb_mask = COMEDI_CB_BLOCK | COMEDI_CB_CANCEL_MASK; if (async->cmd.flags & CMDF_WAKE_EOS) async->cb_mask |= COMEDI_CB_EOS; comedi_update_subdevice_runflags(s, COMEDI_SRF_BUSY_MASK, COMEDI_SRF_RUNNING); /* * Set s->busy _after_ setting COMEDI_SRF_RUNNING flag to avoid * race with comedi_read() or comedi_write(). */ s->busy = file; ret = s->do_cmd(dev, s); if (ret == 0) return 0; cleanup: do_become_nonbusy(dev, s); return ret; } /* * COMEDI_CMDTEST ioctl * asynchronous acquisition command testing * * arg: * pointer to comedi_cmd structure * * reads: * comedi_cmd structure * channel/range list from cmd->chanlist pointer * * writes: * possibly modified comedi_cmd structure */ static int do_cmdtest_ioctl(struct comedi_device *dev, struct comedi_cmd *cmd, bool *copy, void *file) { struct comedi_subdevice *s; unsigned int __user *user_chanlist; int ret; lockdep_assert_held(&dev->mutex); /* do some simple cmd validation */ ret = __comedi_get_user_cmd(dev, cmd); if (ret) return ret; /* save user's chanlist pointer so it can be restored later */ user_chanlist = (unsigned int __user *)cmd->chanlist; s = &dev->subdevices[cmd->subdev]; /* user_chanlist can be NULL for COMEDI_CMDTEST ioctl */ if (user_chanlist) { /* load channel/gain list */ ret = __comedi_get_user_chanlist(dev, s, user_chanlist, cmd); if (ret) return ret; } ret = s->do_cmdtest(dev, s, cmd); kfree(cmd->chanlist); /* free kernel copy of user chanlist */ /* restore chanlist pointer before copying back */ cmd->chanlist = (unsigned int __force *)user_chanlist; *copy = true; return ret; } /* * COMEDI_LOCK ioctl * lock subdevice * * arg: * subdevice number * * reads: * nothing * * writes: * nothing */ static int do_lock_ioctl(struct comedi_device *dev, unsigned long arg, void *file) { int ret = 0; unsigned long flags; struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (arg >= dev->n_subdevices) return -EINVAL; s = &dev->subdevices[arg]; spin_lock_irqsave(&s->spin_lock, flags); if (s->busy || s->lock) ret = -EBUSY; else s->lock = file; spin_unlock_irqrestore(&s->spin_lock, flags); return ret; } /* * COMEDI_UNLOCK ioctl * unlock subdevice * * arg: * subdevice number * * reads: * nothing * * writes: * nothing */ static int do_unlock_ioctl(struct comedi_device *dev, unsigned long arg, void *file) { struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (arg >= dev->n_subdevices) return -EINVAL; s = &dev->subdevices[arg]; if (s->busy) return -EBUSY; if (s->lock && s->lock != file) return -EACCES; if (s->lock == file) s->lock = NULL; return 0; } /* * COMEDI_CANCEL ioctl * cancel asynchronous acquisition * * arg: * subdevice number * * reads: * nothing * * writes: * nothing */ static int do_cancel_ioctl(struct comedi_device *dev, unsigned long arg, void *file) { struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (arg >= dev->n_subdevices) return -EINVAL; s = &dev->subdevices[arg]; if (!s->async) return -EINVAL; if (!s->busy) return 0; if (s->busy != file) return -EBUSY; return do_cancel(dev, s); } /* * COMEDI_POLL ioctl * instructs driver to synchronize buffers * * arg: * subdevice number * * reads: * nothing * * writes: * nothing */ static int do_poll_ioctl(struct comedi_device *dev, unsigned long arg, void *file) { struct comedi_subdevice *s; lockdep_assert_held(&dev->mutex); if (arg >= dev->n_subdevices) return -EINVAL; s = &dev->subdevices[arg]; if (!s->busy) return 0; if (s->busy != file) return -EBUSY; if (s->poll) return s->poll(dev, s); return -EINVAL; } /* * COMEDI_SETRSUBD ioctl * sets the current "read" subdevice on a per-file basis * * arg: * subdevice number * * reads: * nothing * * writes: * nothing */ static int do_setrsubd_ioctl(struct comedi_device *dev, unsigned long arg, struct file *file) { struct comedi_file *cfp = file->private_data; struct comedi_subdevice *s_old, *s_new; lockdep_assert_held(&dev->mutex); if (arg >= dev->n_subdevices) return -EINVAL; s_new = &dev->subdevices[arg]; s_old = comedi_file_read_subdevice(file); if (s_old == s_new) return 0; /* no change */ if (!(s_new->subdev_flags & SDF_CMD_READ)) return -EINVAL; /* * Check the file isn't still busy handling a "read" command on the * old subdevice (if any). */ if (s_old && s_old->busy == file && s_old->async && !(s_old->async->cmd.flags & CMDF_WRITE)) return -EBUSY; WRITE_ONCE(cfp->read_subdev, s_new); return 0; } /* * COMEDI_SETWSUBD ioctl * sets the current "write" subdevice on a per-file basis * * arg: * subdevice number * * reads: * nothing * * writes: * nothing */ static int do_setwsubd_ioctl(struct comedi_device *dev, unsigned long arg, struct file *file) { struct comedi_file *cfp = file->private_data; struct comedi_subdevice *s_old, *s_new; lockdep_assert_held(&dev->mutex); if (arg >= dev->n_subdevices) return -EINVAL; s_new = &dev->subdevices[arg]; s_old = comedi_file_write_subdevice(file); if (s_old == s_new) return 0; /* no change */ if (!(s_new->subdev_flags & SDF_CMD_WRITE)) return -EINVAL; /* * Check the file isn't still busy handling a "write" command on the * old subdevice (if any). */ if (s_old && s_old->busy == file && s_old->async && (s_old->async->cmd.flags & CMDF_WRITE)) return -EBUSY; WRITE_ONCE(cfp->write_subdev, s_new); return 0; } static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { unsigned int minor = iminor(file_inode(file)); struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; int rc; mutex_lock(&dev->mutex); /* * Device config is special, because it must work on * an unconfigured device. */ if (cmd == COMEDI_DEVCONFIG) { if (minor >= COMEDI_NUM_BOARD_MINORS) { /* Device config not appropriate on non-board minors. */ rc = -ENOTTY; goto done; } rc = do_devconfig_ioctl(dev, (struct comedi_devconfig __user *)arg); if (rc == 0) { if (arg == 0 && dev->minor >= comedi_num_legacy_minors) { /* * Successfully unconfigured a dynamically * allocated device. Try and remove it. */ if (comedi_clear_board_dev(dev)) { mutex_unlock(&dev->mutex); comedi_free_board_dev(dev); return rc; } } } goto done; } if (!dev->attached) { dev_dbg(dev->class_dev, "no driver attached\n"); rc = -ENODEV; goto done; } switch (cmd) { case COMEDI_BUFCONFIG: rc = do_bufconfig_ioctl(dev, (struct comedi_bufconfig __user *)arg); break; case COMEDI_DEVINFO: rc = do_devinfo_ioctl(dev, (struct comedi_devinfo __user *)arg, file); break; case COMEDI_SUBDINFO: rc = do_subdinfo_ioctl(dev, (struct comedi_subdinfo __user *)arg, file); break; case COMEDI_CHANINFO: { struct comedi_chaninfo it; if (copy_from_user(&it, (void __user *)arg, sizeof(it))) rc = -EFAULT; else rc = do_chaninfo_ioctl(dev, &it); break; } case COMEDI_RANGEINFO: { struct comedi_rangeinfo it; if (copy_from_user(&it, (void __user *)arg, sizeof(it))) rc = -EFAULT; else rc = do_rangeinfo_ioctl(dev, &it); break; } case COMEDI_BUFINFO: rc = do_bufinfo_ioctl(dev, (struct comedi_bufinfo __user *)arg, file); break; case COMEDI_LOCK: rc = do_lock_ioctl(dev, arg, file); break; case COMEDI_UNLOCK: rc = do_unlock_ioctl(dev, arg, file); break; case COMEDI_CANCEL: rc = do_cancel_ioctl(dev, arg, file); break; case COMEDI_CMD: { struct comedi_cmd cmd; bool copy = false; if (copy_from_user(&cmd, (void __user *)arg, sizeof(cmd))) { rc = -EFAULT; break; } rc = do_cmd_ioctl(dev, &cmd, ©, file); if (copy && copy_to_user((void __user *)arg, &cmd, sizeof(cmd))) rc = -EFAULT; break; } case COMEDI_CMDTEST: { struct comedi_cmd cmd; bool copy = false; if (copy_from_user(&cmd, (void __user *)arg, sizeof(cmd))) { rc = -EFAULT; break; } rc = do_cmdtest_ioctl(dev, &cmd, ©, file); if (copy && copy_to_user((void __user *)arg, &cmd, sizeof(cmd))) rc = -EFAULT; break; } case COMEDI_INSNLIST: { struct comedi_insnlist insnlist; struct comedi_insn *insns = NULL; if (copy_from_user(&insnlist, (void __user *)arg, sizeof(insnlist))) { rc = -EFAULT; break; } insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL); if (!insns) { rc = -ENOMEM; break; } if (copy_from_user(insns, insnlist.insns, sizeof(*insns) * insnlist.n_insns)) { rc = -EFAULT; kfree(insns); break; } rc = do_insnlist_ioctl(dev, insns, insnlist.n_insns, file); kfree(insns); break; } case COMEDI_INSN: { struct comedi_insn insn; if (copy_from_user(&insn, (void __user *)arg, sizeof(insn))) rc = -EFAULT; else rc = do_insn_ioctl(dev, &insn, file); break; } case COMEDI_POLL: rc = do_poll_ioctl(dev, arg, file); break; case COMEDI_SETRSUBD: rc = do_setrsubd_ioctl(dev, arg, file); break; case COMEDI_SETWSUBD: rc = do_setwsubd_ioctl(dev, arg, file); break; default: rc = -ENOTTY; break; } done: mutex_unlock(&dev->mutex); return rc; } static void comedi_vm_open(struct vm_area_struct *area) { struct comedi_buf_map *bm; bm = area->vm_private_data; comedi_buf_map_get(bm); } static void comedi_vm_close(struct vm_area_struct *area) { struct comedi_buf_map *bm; bm = area->vm_private_data; comedi_buf_map_put(bm); } static int comedi_vm_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { struct comedi_buf_map *bm = vma->vm_private_data; unsigned long offset = addr - vma->vm_start + (vma->vm_pgoff << PAGE_SHIFT); if (len < 0) return -EINVAL; if (len > vma->vm_end - addr) len = vma->vm_end - addr; return comedi_buf_map_access(bm, offset, buf, len, write); } static const struct vm_operations_struct comedi_vm_ops = { .open = comedi_vm_open, .close = comedi_vm_close, .access = comedi_vm_access, }; static int comedi_mmap(struct file *file, struct vm_area_struct *vma) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi_subdevice *s; struct comedi_async *async; struct comedi_buf_map *bm = NULL; struct comedi_buf_page *buf; unsigned long start = vma->vm_start; unsigned long size; int n_pages; int i; int retval = 0; /* * 'trylock' avoids circular dependency with current->mm->mmap_lock * and down-reading &dev->attach_lock should normally succeed without * contention unless the device is in the process of being attached * or detached. */ if (!down_read_trylock(&dev->attach_lock)) return -EAGAIN; if (!dev->attached) { dev_dbg(dev->class_dev, "no driver attached\n"); retval = -ENODEV; goto done; } if (vma->vm_flags & VM_WRITE) s = comedi_file_write_subdevice(file); else s = comedi_file_read_subdevice(file); if (!s) { retval = -EINVAL; goto done; } async = s->async; if (!async) { retval = -EINVAL; goto done; } if (vma->vm_pgoff != 0) { dev_dbg(dev->class_dev, "mmap() offset must be 0.\n"); retval = -EINVAL; goto done; } size = vma->vm_end - vma->vm_start; if (size > async->prealloc_bufsz) { retval = -EFAULT; goto done; } if (offset_in_page(size)) { retval = -EFAULT; goto done; } n_pages = vma_pages(vma); /* get reference to current buf map (if any) */ bm = comedi_buf_map_from_subdev_get(s); if (!bm || n_pages > bm->n_pages) { retval = -EINVAL; goto done; } if (bm->dma_dir != DMA_NONE) { /* * DMA buffer was allocated as a single block. * Address is in page_list[0]. */ buf = &bm->page_list[0]; retval = dma_mmap_coherent(bm->dma_hw_dev, vma, buf->virt_addr, buf->dma_addr, n_pages * PAGE_SIZE); } else { for (i = 0; i < n_pages; ++i) { unsigned long pfn; buf = &bm->page_list[i]; pfn = page_to_pfn(virt_to_page(buf->virt_addr)); retval = remap_pfn_range(vma, start, pfn, PAGE_SIZE, PAGE_SHARED); if (retval) break; start += PAGE_SIZE; } } if (retval == 0) { vma->vm_ops = &comedi_vm_ops; vma->vm_private_data = bm; vma->vm_ops->open(vma); } done: up_read(&dev->attach_lock); comedi_buf_map_put(bm); /* put reference to buf map - okay if NULL */ return retval; } static __poll_t comedi_poll(struct file *file, poll_table *wait) { __poll_t mask = 0; struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi_subdevice *s, *s_read; down_read(&dev->attach_lock); if (!dev->attached) { dev_dbg(dev->class_dev, "no driver attached\n"); goto done; } s = comedi_file_read_subdevice(file); s_read = s; if (s && s->async) { poll_wait(file, &s->async->wait_head, wait); if (s->busy != file || !comedi_is_subdevice_running(s) || (s->async->cmd.flags & CMDF_WRITE) || comedi_buf_read_n_available(s) > 0) mask |= EPOLLIN | EPOLLRDNORM; } s = comedi_file_write_subdevice(file); if (s && s->async) { unsigned int bps = comedi_bytes_per_sample(s); if (s != s_read) poll_wait(file, &s->async->wait_head, wait); if (s->busy != file || !comedi_is_subdevice_running(s) || !(s->async->cmd.flags & CMDF_WRITE) || comedi_buf_write_n_available(s) >= bps) mask |= EPOLLOUT | EPOLLWRNORM; } done: up_read(&dev->attach_lock); return mask; } static ssize_t comedi_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *offset) { struct comedi_subdevice *s; struct comedi_async *async; unsigned int n, m; ssize_t count = 0; int retval = 0; DECLARE_WAITQUEUE(wait, current); struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; bool become_nonbusy = false; bool attach_locked; unsigned int old_detach_count; /* Protect against device detachment during operation. */ down_read(&dev->attach_lock); attach_locked = true; old_detach_count = dev->detach_count; if (!dev->attached) { dev_dbg(dev->class_dev, "no driver attached\n"); retval = -ENODEV; goto out; } s = comedi_file_write_subdevice(file); if (!s || !s->async) { retval = -EIO; goto out; } async = s->async; if (s->busy != file || !(async->cmd.flags & CMDF_WRITE)) { retval = -EINVAL; goto out; } add_wait_queue(&async->wait_head, &wait); while (count == 0 && !retval) { unsigned int runflags; unsigned int wp, n1, n2; set_current_state(TASK_INTERRUPTIBLE); runflags = comedi_get_subdevice_runflags(s); if (!comedi_is_runflags_running(runflags)) { if (comedi_is_runflags_in_error(runflags)) retval = -EPIPE; if (retval || nbytes) become_nonbusy = true; break; } if (nbytes == 0) break; /* Allocate all free buffer space. */ comedi_buf_write_alloc(s, async->prealloc_bufsz); m = comedi_buf_write_n_allocated(s); n = min_t(size_t, m, nbytes); if (n == 0) { if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; break; } schedule(); if (signal_pending(current)) { retval = -ERESTARTSYS; break; } if (s->busy != file || !(async->cmd.flags & CMDF_WRITE)) { retval = -EINVAL; break; } continue; } set_current_state(TASK_RUNNING); wp = async->buf_write_ptr; n1 = min(n, async->prealloc_bufsz - wp); n2 = n - n1; m = copy_from_user(async->prealloc_buf + wp, buf, n1); if (m) m += n2; else if (n2) m = copy_from_user(async->prealloc_buf, buf + n1, n2); if (m) { n -= m; retval = -EFAULT; } comedi_buf_write_free(s, n); count += n; nbytes -= n; buf += n; } remove_wait_queue(&async->wait_head, &wait); set_current_state(TASK_RUNNING); if (become_nonbusy && count == 0) { struct comedi_subdevice *new_s; /* * To avoid deadlock, cannot acquire dev->mutex * while dev->attach_lock is held. */ up_read(&dev->attach_lock); attach_locked = false; mutex_lock(&dev->mutex); /* * Check device hasn't become detached behind our back. * Checking dev->detach_count is unchanged ought to be * sufficient (unless there have been 2**32 detaches in the * meantime!), but check the subdevice pointer as well just in * case. * * Also check the subdevice is still in a suitable state to * become non-busy in case it changed behind our back. */ new_s = comedi_file_write_subdevice(file); if (dev->attached && old_detach_count == dev->detach_count && s == new_s && new_s->async == async && s->busy == file && (async->cmd.flags & CMDF_WRITE) && !comedi_is_subdevice_running(s)) do_become_nonbusy(dev, s); mutex_unlock(&dev->mutex); } out: if (attach_locked) up_read(&dev->attach_lock); return count ? count : retval; } static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, loff_t *offset) { struct comedi_subdevice *s; struct comedi_async *async; unsigned int n, m; ssize_t count = 0; int retval = 0; DECLARE_WAITQUEUE(wait, current); struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; unsigned int old_detach_count; bool become_nonbusy = false; bool attach_locked; /* Protect against device detachment during operation. */ down_read(&dev->attach_lock); attach_locked = true; old_detach_count = dev->detach_count; if (!dev->attached) { dev_dbg(dev->class_dev, "no driver attached\n"); retval = -ENODEV; goto out; } s = comedi_file_read_subdevice(file); if (!s || !s->async) { retval = -EIO; goto out; } async = s->async; if (s->busy != file || (async->cmd.flags & CMDF_WRITE)) { retval = -EINVAL; goto out; } add_wait_queue(&async->wait_head, &wait); while (count == 0 && !retval) { unsigned int rp, n1, n2; set_current_state(TASK_INTERRUPTIBLE); m = comedi_buf_read_n_available(s); n = min_t(size_t, m, nbytes); if (n == 0) { unsigned int runflags = comedi_get_subdevice_runflags(s); if (!comedi_is_runflags_running(runflags)) { if (comedi_is_runflags_in_error(runflags)) retval = -EPIPE; if (retval || nbytes) become_nonbusy = true; break; } if (nbytes == 0) break; if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; break; } schedule(); if (signal_pending(current)) { retval = -ERESTARTSYS; break; } if (s->busy != file || (async->cmd.flags & CMDF_WRITE)) { retval = -EINVAL; break; } continue; } set_current_state(TASK_RUNNING); rp = async->buf_read_ptr; n1 = min(n, async->prealloc_bufsz - rp); n2 = n - n1; m = copy_to_user(buf, async->prealloc_buf + rp, n1); if (m) m += n2; else if (n2) m = copy_to_user(buf + n1, async->prealloc_buf, n2); if (m) { n -= m; retval = -EFAULT; } comedi_buf_read_alloc(s, n); comedi_buf_read_free(s, n); count += n; nbytes -= n; buf += n; } remove_wait_queue(&async->wait_head, &wait); set_current_state(TASK_RUNNING); if (become_nonbusy && count == 0) { struct comedi_subdevice *new_s; /* * To avoid deadlock, cannot acquire dev->mutex * while dev->attach_lock is held. */ up_read(&dev->attach_lock); attach_locked = false; mutex_lock(&dev->mutex); /* * Check device hasn't become detached behind our back. * Checking dev->detach_count is unchanged ought to be * sufficient (unless there have been 2**32 detaches in the * meantime!), but check the subdevice pointer as well just in * case. * * Also check the subdevice is still in a suitable state to * become non-busy in case it changed behind our back. */ new_s = comedi_file_read_subdevice(file); if (dev->attached && old_detach_count == dev->detach_count && s == new_s && new_s->async == async && s->busy == file && !(async->cmd.flags & CMDF_WRITE) && !comedi_is_subdevice_running(s) && comedi_buf_read_n_available(s) == 0) do_become_nonbusy(dev, s); mutex_unlock(&dev->mutex); } out: if (attach_locked) up_read(&dev->attach_lock); return count ? count : retval; } static int comedi_open(struct inode *inode, struct file *file) { const unsigned int minor = iminor(inode); struct comedi_file *cfp; struct comedi_device *dev = comedi_dev_get_from_minor(minor); int rc; if (!dev) { pr_debug("invalid minor number\n"); return -ENODEV; } cfp = kzalloc(sizeof(*cfp), GFP_KERNEL); if (!cfp) { comedi_dev_put(dev); return -ENOMEM; } cfp->dev = dev; mutex_lock(&dev->mutex); if (!dev->attached && !capable(CAP_SYS_ADMIN)) { dev_dbg(dev->class_dev, "not attached and not CAP_SYS_ADMIN\n"); rc = -ENODEV; goto out; } if (dev->attached && dev->use_count == 0) { if (!try_module_get(dev->driver->module)) { rc = -ENXIO; goto out; } if (dev->open) { rc = dev->open(dev); if (rc < 0) { module_put(dev->driver->module); goto out; } } } dev->use_count++; file->private_data = cfp; comedi_file_reset(file); rc = 0; out: mutex_unlock(&dev->mutex); if (rc) { comedi_dev_put(dev); kfree(cfp); } return rc; } static int comedi_fasync(int fd, struct file *file, int on) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; return fasync_helper(fd, file, on, &dev->async_queue); } static int comedi_close(struct inode *inode, struct file *file) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi_subdevice *s = NULL; int i; mutex_lock(&dev->mutex); if (dev->subdevices) { for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; if (s->busy == file) do_cancel(dev, s); if (s->lock == file) s->lock = NULL; } } if (dev->attached && dev->use_count == 1) { if (dev->close) dev->close(dev); module_put(dev->driver->module); } dev->use_count--; mutex_unlock(&dev->mutex); comedi_dev_put(dev); kfree(cfp); return 0; } #ifdef CONFIG_COMPAT #define COMEDI32_CHANINFO _IOR(CIO, 3, struct comedi32_chaninfo_struct) #define COMEDI32_RANGEINFO _IOR(CIO, 8, struct comedi32_rangeinfo_struct) /* * N.B. COMEDI32_CMD and COMEDI_CMD ought to use _IOWR, not _IOR. * It's too late to change it now, but it only affects the command number. */ #define COMEDI32_CMD _IOR(CIO, 9, struct comedi32_cmd_struct) /* * N.B. COMEDI32_CMDTEST and COMEDI_CMDTEST ought to use _IOWR, not _IOR. * It's too late to change it now, but it only affects the command number. */ #define COMEDI32_CMDTEST _IOR(CIO, 10, struct comedi32_cmd_struct) #define COMEDI32_INSNLIST _IOR(CIO, 11, struct comedi32_insnlist_struct) #define COMEDI32_INSN _IOR(CIO, 12, struct comedi32_insn_struct) struct comedi32_chaninfo_struct { unsigned int subdev; compat_uptr_t maxdata_list; /* 32-bit 'unsigned int *' */ compat_uptr_t flaglist; /* 32-bit 'unsigned int *' */ compat_uptr_t rangelist; /* 32-bit 'unsigned int *' */ unsigned int unused[4]; }; struct comedi32_rangeinfo_struct { unsigned int range_type; compat_uptr_t range_ptr; /* 32-bit 'void *' */ }; struct comedi32_cmd_struct { unsigned int subdev; unsigned int flags; unsigned int start_src; unsigned int start_arg; unsigned int scan_begin_src; unsigned int scan_begin_arg; unsigned int convert_src; unsigned int convert_arg; unsigned int scan_end_src; unsigned int scan_end_arg; unsigned int stop_src; unsigned int stop_arg; compat_uptr_t chanlist; /* 32-bit 'unsigned int *' */ unsigned int chanlist_len; compat_uptr_t data; /* 32-bit 'short *' */ unsigned int data_len; }; struct comedi32_insn_struct { unsigned int insn; unsigned int n; compat_uptr_t data; /* 32-bit 'unsigned int *' */ unsigned int subdev; unsigned int chanspec; unsigned int unused[3]; }; struct comedi32_insnlist_struct { unsigned int n_insns; compat_uptr_t insns; /* 32-bit 'struct comedi_insn *' */ }; /* Handle 32-bit COMEDI_CHANINFO ioctl. */ static int compat_chaninfo(struct file *file, unsigned long arg) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi32_chaninfo_struct chaninfo32; struct comedi_chaninfo chaninfo; int err; if (copy_from_user(&chaninfo32, compat_ptr(arg), sizeof(chaninfo32))) return -EFAULT; memset(&chaninfo, 0, sizeof(chaninfo)); chaninfo.subdev = chaninfo32.subdev; chaninfo.maxdata_list = compat_ptr(chaninfo32.maxdata_list); chaninfo.flaglist = compat_ptr(chaninfo32.flaglist); chaninfo.rangelist = compat_ptr(chaninfo32.rangelist); mutex_lock(&dev->mutex); err = do_chaninfo_ioctl(dev, &chaninfo); mutex_unlock(&dev->mutex); return err; } /* Handle 32-bit COMEDI_RANGEINFO ioctl. */ static int compat_rangeinfo(struct file *file, unsigned long arg) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi32_rangeinfo_struct rangeinfo32; struct comedi_rangeinfo rangeinfo; int err; if (copy_from_user(&rangeinfo32, compat_ptr(arg), sizeof(rangeinfo32))) return -EFAULT; memset(&rangeinfo, 0, sizeof(rangeinfo)); rangeinfo.range_type = rangeinfo32.range_type; rangeinfo.range_ptr = compat_ptr(rangeinfo32.range_ptr); mutex_lock(&dev->mutex); err = do_rangeinfo_ioctl(dev, &rangeinfo); mutex_unlock(&dev->mutex); return err; } /* Copy 32-bit cmd structure to native cmd structure. */ static int get_compat_cmd(struct comedi_cmd *cmd, struct comedi32_cmd_struct __user *cmd32) { struct comedi32_cmd_struct v32; if (copy_from_user(&v32, cmd32, sizeof(v32))) return -EFAULT; cmd->subdev = v32.subdev; cmd->flags = v32.flags; cmd->start_src = v32.start_src; cmd->start_arg = v32.start_arg; cmd->scan_begin_src = v32.scan_begin_src; cmd->scan_begin_arg = v32.scan_begin_arg; cmd->convert_src = v32.convert_src; cmd->convert_arg = v32.convert_arg; cmd->scan_end_src = v32.scan_end_src; cmd->scan_end_arg = v32.scan_end_arg; cmd->stop_src = v32.stop_src; cmd->stop_arg = v32.stop_arg; cmd->chanlist = (unsigned int __force *)compat_ptr(v32.chanlist); cmd->chanlist_len = v32.chanlist_len; cmd->data = compat_ptr(v32.data); cmd->data_len = v32.data_len; return 0; } /* Copy native cmd structure to 32-bit cmd structure. */ static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32, struct comedi_cmd *cmd) { struct comedi32_cmd_struct v32; memset(&v32, 0, sizeof(v32)); v32.subdev = cmd->subdev; v32.flags = cmd->flags; v32.start_src = cmd->start_src; v32.start_arg = cmd->start_arg; v32.scan_begin_src = cmd->scan_begin_src; v32.scan_begin_arg = cmd->scan_begin_arg; v32.convert_src = cmd->convert_src; v32.convert_arg = cmd->convert_arg; v32.scan_end_src = cmd->scan_end_src; v32.scan_end_arg = cmd->scan_end_arg; v32.stop_src = cmd->stop_src; v32.stop_arg = cmd->stop_arg; /* Assume chanlist pointer is unchanged. */ v32.chanlist = ptr_to_compat((unsigned int __user *)cmd->chanlist); v32.chanlist_len = cmd->chanlist_len; v32.data = ptr_to_compat(cmd->data); v32.data_len = cmd->data_len; if (copy_to_user(cmd32, &v32, sizeof(v32))) return -EFAULT; return 0; } /* Handle 32-bit COMEDI_CMD ioctl. */ static int compat_cmd(struct file *file, unsigned long arg) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi_cmd cmd; bool copy = false; int rc, err; rc = get_compat_cmd(&cmd, compat_ptr(arg)); if (rc) return rc; mutex_lock(&dev->mutex); rc = do_cmd_ioctl(dev, &cmd, ©, file); mutex_unlock(&dev->mutex); if (copy) { /* Special case: copy cmd back to user. */ err = put_compat_cmd(compat_ptr(arg), &cmd); if (err) rc = err; } return rc; } /* Handle 32-bit COMEDI_CMDTEST ioctl. */ static int compat_cmdtest(struct file *file, unsigned long arg) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi_cmd cmd; bool copy = false; int rc, err; rc = get_compat_cmd(&cmd, compat_ptr(arg)); if (rc) return rc; mutex_lock(&dev->mutex); rc = do_cmdtest_ioctl(dev, &cmd, ©, file); mutex_unlock(&dev->mutex); if (copy) { err = put_compat_cmd(compat_ptr(arg), &cmd); if (err) rc = err; } return rc; } /* Copy 32-bit insn structure to native insn structure. */ static int get_compat_insn(struct comedi_insn *insn, struct comedi32_insn_struct __user *insn32) { struct comedi32_insn_struct v32; /* Copy insn structure. Ignore the unused members. */ if (copy_from_user(&v32, insn32, sizeof(v32))) return -EFAULT; memset(insn, 0, sizeof(*insn)); insn->insn = v32.insn; insn->n = v32.n; insn->data = compat_ptr(v32.data); insn->subdev = v32.subdev; insn->chanspec = v32.chanspec; return 0; } /* Handle 32-bit COMEDI_INSNLIST ioctl. */ static int compat_insnlist(struct file *file, unsigned long arg) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi32_insnlist_struct insnlist32; struct comedi32_insn_struct __user *insn32; struct comedi_insn *insns; unsigned int n; int rc; if (copy_from_user(&insnlist32, compat_ptr(arg), sizeof(insnlist32))) return -EFAULT; insns = kcalloc(insnlist32.n_insns, sizeof(*insns), GFP_KERNEL); if (!insns) return -ENOMEM; /* Copy insn structures. */ insn32 = compat_ptr(insnlist32.insns); for (n = 0; n < insnlist32.n_insns; n++) { rc = get_compat_insn(insns + n, insn32 + n); if (rc) { kfree(insns); return rc; } } mutex_lock(&dev->mutex); rc = do_insnlist_ioctl(dev, insns, insnlist32.n_insns, file); mutex_unlock(&dev->mutex); kfree(insns); return rc; } /* Handle 32-bit COMEDI_INSN ioctl. */ static int compat_insn(struct file *file, unsigned long arg) { struct comedi_file *cfp = file->private_data; struct comedi_device *dev = cfp->dev; struct comedi_insn insn; int rc; rc = get_compat_insn(&insn, (void __user *)arg); if (rc) return rc; mutex_lock(&dev->mutex); rc = do_insn_ioctl(dev, &insn, file); mutex_unlock(&dev->mutex); return rc; } /* * compat_ioctl file operation. * * Returns -ENOIOCTLCMD for unrecognised ioctl codes. */ static long comedi_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int rc; switch (cmd) { case COMEDI_DEVCONFIG: case COMEDI_DEVINFO: case COMEDI_SUBDINFO: case COMEDI_BUFCONFIG: case COMEDI_BUFINFO: /* Just need to translate the pointer argument. */ arg = (unsigned long)compat_ptr(arg); rc = comedi_unlocked_ioctl(file, cmd, arg); break; case COMEDI_LOCK: case COMEDI_UNLOCK: case COMEDI_CANCEL: case COMEDI_POLL: case COMEDI_SETRSUBD: case COMEDI_SETWSUBD: /* No translation needed. */ rc = comedi_unlocked_ioctl(file, cmd, arg); break; case COMEDI32_CHANINFO: rc = compat_chaninfo(file, arg); break; case COMEDI32_RANGEINFO: rc = compat_rangeinfo(file, arg); break; case COMEDI32_CMD: rc = compat_cmd(file, arg); break; case COMEDI32_CMDTEST: rc = compat_cmdtest(file, arg); break; case COMEDI32_INSNLIST: rc = compat_insnlist(file, arg); break; case COMEDI32_INSN: rc = compat_insn(file, arg); break; default: rc = -ENOIOCTLCMD; break; } return rc; } #else #define comedi_compat_ioctl NULL #endif static const struct file_operations comedi_fops = { .owner = THIS_MODULE, .unlocked_ioctl = comedi_unlocked_ioctl, .compat_ioctl = comedi_compat_ioctl, .open = comedi_open, .release = comedi_close, .read = comedi_read, .write = comedi_write, .mmap = comedi_mmap, .poll = comedi_poll, .fasync = comedi_fasync, .llseek = noop_llseek, }; /** * comedi_event() - Handle events for asynchronous COMEDI command * @dev: COMEDI device. * @s: COMEDI subdevice. * Context: in_interrupt() (usually), @s->spin_lock spin-lock not held. * * If an asynchronous COMEDI command is active on the subdevice, process * any %COMEDI_CB_... event flags that have been set, usually by an * interrupt handler. These may change the run state of the asynchronous * command, wake a task, and/or send a %SIGIO signal. */ void comedi_event(struct comedi_device *dev, struct comedi_subdevice *s) { struct comedi_async *async = s->async; unsigned int events; int si_code = 0; unsigned long flags; spin_lock_irqsave(&s->spin_lock, flags); events = async->events; async->events = 0; if (!__comedi_is_subdevice_running(s)) { spin_unlock_irqrestore(&s->spin_lock, flags); return; } if (events & COMEDI_CB_CANCEL_MASK) __comedi_clear_subdevice_runflags(s, COMEDI_SRF_RUNNING); /* * Remember if an error event has occurred, so an error can be * returned the next time the user does a read() or write(). */ if (events & COMEDI_CB_ERROR_MASK) __comedi_set_subdevice_runflags(s, COMEDI_SRF_ERROR); if (async->cb_mask & events) { wake_up_interruptible(&async->wait_head); si_code = async->cmd.flags & CMDF_WRITE ? POLL_OUT : POLL_IN; } spin_unlock_irqrestore(&s->spin_lock, flags); if (si_code) kill_fasync(&dev->async_queue, SIGIO, si_code); } EXPORT_SYMBOL_GPL(comedi_event); /* Note: the ->mutex is pre-locked on successful return */ struct comedi_device *comedi_alloc_board_minor(struct device *hardware_device) { struct comedi_device *dev; struct device *csdev; unsigned int i; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); comedi_device_init(dev); comedi_set_hw_dev(dev, hardware_device); mutex_lock(&dev->mutex); mutex_lock(&comedi_board_minor_table_lock); for (i = hardware_device ? comedi_num_legacy_minors : 0; i < COMEDI_NUM_BOARD_MINORS; ++i) { if (!comedi_board_minor_table[i]) { comedi_board_minor_table[i] = dev; break; } } mutex_unlock(&comedi_board_minor_table_lock); if (i == COMEDI_NUM_BOARD_MINORS) { mutex_unlock(&dev->mutex); comedi_device_cleanup(dev); comedi_dev_put(dev); dev_err(hardware_device, "ran out of minor numbers for board device files\n"); return ERR_PTR(-EBUSY); } dev->minor = i; csdev = device_create(&comedi_class, hardware_device, MKDEV(COMEDI_MAJOR, i), NULL, "comedi%i", i); if (!IS_ERR(csdev)) dev->class_dev = get_device(csdev); /* Note: dev->mutex needs to be unlocked by the caller. */ return dev; } void comedi_release_hardware_device(struct device *hardware_device) { int minor; struct comedi_device *dev; for (minor = comedi_num_legacy_minors; minor < COMEDI_NUM_BOARD_MINORS; minor++) { mutex_lock(&comedi_board_minor_table_lock); dev = comedi_board_minor_table[minor]; if (dev && dev->hw_dev == hardware_device) { comedi_board_minor_table[minor] = NULL; mutex_unlock(&comedi_board_minor_table_lock); comedi_free_board_dev(dev); break; } mutex_unlock(&comedi_board_minor_table_lock); } } int comedi_alloc_subdevice_minor(struct comedi_subdevice *s) { struct comedi_device *dev = s->device; struct device *csdev; unsigned int i; mutex_lock(&comedi_subdevice_minor_table_lock); for (i = 0; i < COMEDI_NUM_SUBDEVICE_MINORS; ++i) { if (!comedi_subdevice_minor_table[i]) { comedi_subdevice_minor_table[i] = s; break; } } mutex_unlock(&comedi_subdevice_minor_table_lock); if (i == COMEDI_NUM_SUBDEVICE_MINORS) { dev_err(dev->class_dev, "ran out of minor numbers for subdevice files\n"); return -EBUSY; } i += COMEDI_NUM_BOARD_MINORS; s->minor = i; csdev = device_create(&comedi_class, dev->class_dev, MKDEV(COMEDI_MAJOR, i), NULL, "comedi%i_subd%i", dev->minor, s->index); if (!IS_ERR(csdev)) s->class_dev = csdev; return 0; } void comedi_free_subdevice_minor(struct comedi_subdevice *s) { unsigned int i; if (!s) return; if (s->minor < COMEDI_NUM_BOARD_MINORS || s->minor >= COMEDI_NUM_MINORS) return; i = s->minor - COMEDI_NUM_BOARD_MINORS; mutex_lock(&comedi_subdevice_minor_table_lock); if (s == comedi_subdevice_minor_table[i]) comedi_subdevice_minor_table[i] = NULL; mutex_unlock(&comedi_subdevice_minor_table_lock); if (s->class_dev) { device_destroy(&comedi_class, MKDEV(COMEDI_MAJOR, s->minor)); s->class_dev = NULL; } } static void comedi_cleanup_board_minors(void) { struct comedi_device *dev; unsigned int i; for (i = 0; i < COMEDI_NUM_BOARD_MINORS; i++) { dev = comedi_clear_board_minor(i); comedi_free_board_dev(dev); } } static int __init comedi_init(void) { int i; int retval; pr_info("version " COMEDI_RELEASE " - http://www.comedi.org\n"); if (comedi_num_legacy_minors > COMEDI_NUM_BOARD_MINORS) { pr_err("invalid value for module parameter \"comedi_num_legacy_minors\". Valid values are 0 through %i.\n", COMEDI_NUM_BOARD_MINORS); return -EINVAL; } retval = register_chrdev_region(MKDEV(COMEDI_MAJOR, 0), COMEDI_NUM_MINORS, "comedi"); if (retval) return retval; cdev_init(&comedi_cdev, &comedi_fops); comedi_cdev.owner = THIS_MODULE; retval = kobject_set_name(&comedi_cdev.kobj, "comedi"); if (retval) goto out_unregister_chrdev_region; retval = cdev_add(&comedi_cdev, MKDEV(COMEDI_MAJOR, 0), COMEDI_NUM_MINORS); if (retval) goto out_unregister_chrdev_region; retval = class_register(&comedi_class); if (retval) { pr_err("failed to create class\n"); goto out_cdev_del; } /* create devices files for legacy/manual use */ for (i = 0; i < comedi_num_legacy_minors; i++) { struct comedi_device *dev; dev = comedi_alloc_board_minor(NULL); if (IS_ERR(dev)) { retval = PTR_ERR(dev); goto out_cleanup_board_minors; } /* comedi_alloc_board_minor() locked the mutex */ lockdep_assert_held(&dev->mutex); mutex_unlock(&dev->mutex); } /* XXX requires /proc interface */ comedi_proc_init(); return 0; out_cleanup_board_minors: comedi_cleanup_board_minors(); class_unregister(&comedi_class); out_cdev_del: cdev_del(&comedi_cdev); out_unregister_chrdev_region: unregister_chrdev_region(MKDEV(COMEDI_MAJOR, 0), COMEDI_NUM_MINORS); return retval; } module_init(comedi_init); static void __exit comedi_cleanup(void) { comedi_cleanup_board_minors(); class_unregister(&comedi_class); cdev_del(&comedi_cdev); unregister_chrdev_region(MKDEV(COMEDI_MAJOR, 0), COMEDI_NUM_MINORS); comedi_proc_cleanup(); } module_exit(comedi_cleanup); MODULE_AUTHOR("https://www.comedi.org"); MODULE_DESCRIPTION("Comedi core module"); MODULE_LICENSE("GPL"); |
4 3 9 9 3 3 2 3 12 2 8 8 3 8 5 6 7 6 8 8 7 1 7 7 7 7 4 7 2 6 2 2 8 6 10 9 7 17 17 22 22 21 1 15 6 21 20 10 19 22 22 15 1 3 18 6 15 15 15 15 33 10 33 23 23 23 12 11 23 23 12 23 16 23 33 33 13 37 37 37 37 18 21 4 4 4 1 3 4 2 25 13 25 5 25 12 38 22 37 22 37 37 15 37 21 3 1 1 14 13 1 14 3 4 2 3 1 2 1 2 1 1 1 18 41 23 18 41 3 28 13 3 17 7 21 10 1 10 2 1 14 6 12 3 2 12 1 2 1 6 4 5 6 4 4 8 7 6 62 61 2 60 32 14 15 4 1 1 4 5 1 3 1 55 45 4 45 37 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 | /* * usbmidi.c - ALSA USB MIDI driver * * Copyright (c) 2002-2009 Clemens Ladisch * All rights reserved. * * Based on the OSS usb-midi driver by NAGANO Daisuke, * NetBSD's umidi driver by Takuya SHIOZAKI, * the "USB Device Class Definition for MIDI Devices" by Roland * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, this software may be distributed and/or modified under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <linux/kernel.h> #include <linux/types.h> #include <linux/bitops.h> #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/usb.h> #include <linux/wait.h> #include <linux/usb/audio.h> #include <linux/usb/midi.h> #include <linux/module.h> #include <sound/core.h> #include <sound/control.h> #include <sound/rawmidi.h> #include <sound/asequencer.h> #include "usbaudio.h" #include "midi.h" #include "power.h" #include "helper.h" /* * define this to log all USB packets */ /* #define DUMP_PACKETS */ /* * how long to wait after some USB errors, so that hub_wq can disconnect() us * without too many spurious errors */ #define ERROR_DELAY_JIFFIES (HZ / 10) #define OUTPUT_URBS 7 #define INPUT_URBS 7 MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>"); MODULE_DESCRIPTION("USB Audio/MIDI helper module"); MODULE_LICENSE("Dual BSD/GPL"); struct snd_usb_midi_in_endpoint; struct snd_usb_midi_out_endpoint; struct snd_usb_midi_endpoint; struct usb_protocol_ops { void (*input)(struct snd_usb_midi_in_endpoint*, uint8_t*, int); void (*output)(struct snd_usb_midi_out_endpoint *ep, struct urb *urb); void (*output_packet)(struct urb*, uint8_t, uint8_t, uint8_t, uint8_t); void (*init_out_endpoint)(struct snd_usb_midi_out_endpoint *); void (*finish_out_endpoint)(struct snd_usb_midi_out_endpoint *); }; struct snd_usb_midi { struct usb_device *dev; struct snd_card *card; struct usb_interface *iface; const struct snd_usb_audio_quirk *quirk; struct snd_rawmidi *rmidi; const struct usb_protocol_ops *usb_protocol_ops; struct list_head list; struct timer_list error_timer; spinlock_t disc_lock; struct rw_semaphore disc_rwsem; struct mutex mutex; u32 usb_id; int next_midi_device; struct snd_usb_midi_endpoint { struct snd_usb_midi_out_endpoint *out; struct snd_usb_midi_in_endpoint *in; } endpoints[MIDI_MAX_ENDPOINTS]; unsigned long input_triggered; unsigned int opened[2]; unsigned char disconnected; unsigned char input_running; struct snd_kcontrol *roland_load_ctl; }; struct snd_usb_midi_out_endpoint { struct snd_usb_midi *umidi; struct out_urb_context { struct urb *urb; struct snd_usb_midi_out_endpoint *ep; } urbs[OUTPUT_URBS]; unsigned int active_urbs; unsigned int drain_urbs; int max_transfer; /* size of urb buffer */ struct work_struct work; unsigned int next_urb; spinlock_t buffer_lock; struct usbmidi_out_port { struct snd_usb_midi_out_endpoint *ep; struct snd_rawmidi_substream *substream; int active; uint8_t cable; /* cable number << 4 */ uint8_t state; #define STATE_UNKNOWN 0 #define STATE_1PARAM 1 #define STATE_2PARAM_1 2 #define STATE_2PARAM_2 3 #define STATE_SYSEX_0 4 #define STATE_SYSEX_1 5 #define STATE_SYSEX_2 6 uint8_t data[2]; } ports[0x10]; int current_port; wait_queue_head_t drain_wait; }; struct snd_usb_midi_in_endpoint { struct snd_usb_midi *umidi; struct urb *urbs[INPUT_URBS]; struct usbmidi_in_port { struct snd_rawmidi_substream *substream; u8 running_status_length; } ports[0x10]; u8 seen_f5; bool in_sysex; u8 last_cin; u8 error_resubmit; int current_port; }; static void snd_usbmidi_do_output(struct snd_usb_midi_out_endpoint *ep); static const uint8_t snd_usbmidi_cin_length[] = { 0, 0, 2, 3, 3, 1, 2, 3, 3, 3, 3, 3, 2, 2, 3, 1 }; /* * Submits the URB, with error handling. */ static int snd_usbmidi_submit_urb(struct urb *urb, gfp_t flags) { int err = usb_submit_urb(urb, flags); if (err < 0 && err != -ENODEV) dev_err(&urb->dev->dev, "usb_submit_urb: %d\n", err); return err; } /* * Error handling for URB completion functions. */ static int snd_usbmidi_urb_error(const struct urb *urb) { switch (urb->status) { /* manually unlinked, or device gone */ case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: case -ENODEV: return -ENODEV; /* errors that might occur during unplugging */ case -EPROTO: case -ETIME: case -EILSEQ: return -EIO; default: dev_err(&urb->dev->dev, "urb status %d\n", urb->status); return 0; /* continue */ } } /* * Receives a chunk of MIDI data. */ static void snd_usbmidi_input_data(struct snd_usb_midi_in_endpoint *ep, int portidx, uint8_t *data, int length) { struct usbmidi_in_port *port = &ep->ports[portidx]; if (!port->substream) { dev_dbg(&ep->umidi->dev->dev, "unexpected port %d!\n", portidx); return; } if (!test_bit(port->substream->number, &ep->umidi->input_triggered)) return; snd_rawmidi_receive(port->substream, data, length); } #ifdef DUMP_PACKETS static void dump_urb(const char *type, const u8 *data, int length) { pr_debug("%s packet: [", type); for (; length > 0; ++data, --length) pr_cont(" %02x", *data); pr_cont(" ]\n"); } #else #define dump_urb(type, data, length) /* nothing */ #endif /* * Processes the data read from the device. */ static void snd_usbmidi_in_urb_complete(struct urb *urb) { struct snd_usb_midi_in_endpoint *ep = urb->context; if (urb->status == 0) { dump_urb("received", urb->transfer_buffer, urb->actual_length); ep->umidi->usb_protocol_ops->input(ep, urb->transfer_buffer, urb->actual_length); } else { int err = snd_usbmidi_urb_error(urb); if (err < 0) { if (err != -ENODEV) { ep->error_resubmit = 1; mod_timer(&ep->umidi->error_timer, jiffies + ERROR_DELAY_JIFFIES); } return; } } urb->dev = ep->umidi->dev; snd_usbmidi_submit_urb(urb, GFP_ATOMIC); } static void snd_usbmidi_out_urb_complete(struct urb *urb) { struct out_urb_context *context = urb->context; struct snd_usb_midi_out_endpoint *ep = context->ep; unsigned int urb_index; unsigned long flags; spin_lock_irqsave(&ep->buffer_lock, flags); urb_index = context - ep->urbs; ep->active_urbs &= ~(1 << urb_index); if (unlikely(ep->drain_urbs)) { ep->drain_urbs &= ~(1 << urb_index); wake_up(&ep->drain_wait); } spin_unlock_irqrestore(&ep->buffer_lock, flags); if (urb->status < 0) { int err = snd_usbmidi_urb_error(urb); if (err < 0) { if (err != -ENODEV) mod_timer(&ep->umidi->error_timer, jiffies + ERROR_DELAY_JIFFIES); return; } } snd_usbmidi_do_output(ep); } /* * This is called when some data should be transferred to the device * (from one or more substreams). */ static void snd_usbmidi_do_output(struct snd_usb_midi_out_endpoint *ep) { unsigned int urb_index; struct urb *urb; unsigned long flags; spin_lock_irqsave(&ep->buffer_lock, flags); if (ep->umidi->disconnected) { spin_unlock_irqrestore(&ep->buffer_lock, flags); return; } urb_index = ep->next_urb; for (;;) { if (!(ep->active_urbs & (1 << urb_index))) { urb = ep->urbs[urb_index].urb; urb->transfer_buffer_length = 0; ep->umidi->usb_protocol_ops->output(ep, urb); if (urb->transfer_buffer_length == 0) break; dump_urb("sending", urb->transfer_buffer, urb->transfer_buffer_length); urb->dev = ep->umidi->dev; if (snd_usbmidi_submit_urb(urb, GFP_ATOMIC) < 0) break; ep->active_urbs |= 1 << urb_index; } if (++urb_index >= OUTPUT_URBS) urb_index = 0; if (urb_index == ep->next_urb) break; } ep->next_urb = urb_index; spin_unlock_irqrestore(&ep->buffer_lock, flags); } static void snd_usbmidi_out_work(struct work_struct *work) { struct snd_usb_midi_out_endpoint *ep = container_of(work, struct snd_usb_midi_out_endpoint, work); snd_usbmidi_do_output(ep); } /* called after transfers had been interrupted due to some USB error */ static void snd_usbmidi_error_timer(struct timer_list *t) { struct snd_usb_midi *umidi = from_timer(umidi, t, error_timer); unsigned int i, j; spin_lock(&umidi->disc_lock); if (umidi->disconnected) { spin_unlock(&umidi->disc_lock); return; } for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { struct snd_usb_midi_in_endpoint *in = umidi->endpoints[i].in; if (in && in->error_resubmit) { in->error_resubmit = 0; for (j = 0; j < INPUT_URBS; ++j) { if (atomic_read(&in->urbs[j]->use_count)) continue; in->urbs[j]->dev = umidi->dev; snd_usbmidi_submit_urb(in->urbs[j], GFP_ATOMIC); } } if (umidi->endpoints[i].out) snd_usbmidi_do_output(umidi->endpoints[i].out); } spin_unlock(&umidi->disc_lock); } /* helper function to send static data that may not DMA-able */ static int send_bulk_static_data(struct snd_usb_midi_out_endpoint *ep, const void *data, int len) { int err = 0; void *buf = kmemdup(data, len, GFP_KERNEL); if (!buf) return -ENOMEM; dump_urb("sending", buf, len); if (ep->urbs[0].urb) err = usb_bulk_msg(ep->umidi->dev, ep->urbs[0].urb->pipe, buf, len, NULL, 250); kfree(buf); return err; } /* * Standard USB MIDI protocol: see the spec. * Midiman protocol: like the standard protocol, but the control byte is the * fourth byte in each packet, and uses length instead of CIN. */ static void snd_usbmidi_standard_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { int i; for (i = 0; i + 3 < buffer_length; i += 4) if (buffer[i] != 0) { int cable = buffer[i] >> 4; int length = snd_usbmidi_cin_length[buffer[i] & 0x0f]; snd_usbmidi_input_data(ep, cable, &buffer[i + 1], length); } } static void snd_usbmidi_midiman_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { int i; for (i = 0; i + 3 < buffer_length; i += 4) if (buffer[i + 3] != 0) { int port = buffer[i + 3] >> 4; int length = buffer[i + 3] & 3; snd_usbmidi_input_data(ep, port, &buffer[i], length); } } /* * Buggy M-Audio device: running status on input results in a packet that has * the data bytes but not the status byte and that is marked with CIN 4. */ static void snd_usbmidi_maudio_broken_running_status_input( struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { int i; for (i = 0; i + 3 < buffer_length; i += 4) if (buffer[i] != 0) { int cable = buffer[i] >> 4; u8 cin = buffer[i] & 0x0f; struct usbmidi_in_port *port = &ep->ports[cable]; int length; length = snd_usbmidi_cin_length[cin]; if (cin == 0xf && buffer[i + 1] >= 0xf8) ; /* realtime msg: no running status change */ else if (cin >= 0x8 && cin <= 0xe) /* channel msg */ port->running_status_length = length - 1; else if (cin == 0x4 && port->running_status_length != 0 && buffer[i + 1] < 0x80) /* CIN 4 that is not a SysEx */ length = port->running_status_length; else /* * All other msgs cannot begin running status. * (A channel msg sent as two or three CIN 0xF * packets could in theory, but this device * doesn't use this format.) */ port->running_status_length = 0; snd_usbmidi_input_data(ep, cable, &buffer[i + 1], length); } } /* * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4 * but the previously seen CIN, but still with three data bytes. */ static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { unsigned int i, cin, length; for (i = 0; i + 3 < buffer_length; i += 4) { if (buffer[i] == 0 && i > 0) break; cin = buffer[i] & 0x0f; if (ep->in_sysex && cin == ep->last_cin && (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0) cin = 0x4; #if 0 if (buffer[i + 1] == 0x90) { /* * Either a corrupted running status or a real note-on * message; impossible to detect reliably. */ } #endif length = snd_usbmidi_cin_length[cin]; snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length); ep->in_sysex = cin == 0x4; if (!ep->in_sysex) ep->last_cin = cin; } } /* * CME protocol: like the standard protocol, but SysEx commands are sent as a * single USB packet preceded by a 0x0F byte. */ static void snd_usbmidi_cme_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { if (buffer_length < 2 || (buffer[0] & 0x0f) != 0x0f) snd_usbmidi_standard_input(ep, buffer, buffer_length); else snd_usbmidi_input_data(ep, buffer[0] >> 4, &buffer[1], buffer_length - 1); } /* * Adds one USB MIDI packet to the output buffer. */ static void snd_usbmidi_output_standard_packet(struct urb *urb, uint8_t p0, uint8_t p1, uint8_t p2, uint8_t p3) { uint8_t *buf = (uint8_t *)urb->transfer_buffer + urb->transfer_buffer_length; buf[0] = p0; buf[1] = p1; buf[2] = p2; buf[3] = p3; urb->transfer_buffer_length += 4; } /* * Adds one Midiman packet to the output buffer. */ static void snd_usbmidi_output_midiman_packet(struct urb *urb, uint8_t p0, uint8_t p1, uint8_t p2, uint8_t p3) { uint8_t *buf = (uint8_t *)urb->transfer_buffer + urb->transfer_buffer_length; buf[0] = p1; buf[1] = p2; buf[2] = p3; buf[3] = (p0 & 0xf0) | snd_usbmidi_cin_length[p0 & 0x0f]; urb->transfer_buffer_length += 4; } /* * Converts MIDI commands to USB MIDI packets. */ static void snd_usbmidi_transmit_byte(struct usbmidi_out_port *port, uint8_t b, struct urb *urb) { uint8_t p0 = port->cable; void (*output_packet)(struct urb*, uint8_t, uint8_t, uint8_t, uint8_t) = port->ep->umidi->usb_protocol_ops->output_packet; if (b >= 0xf8) { output_packet(urb, p0 | 0x0f, b, 0, 0); } else if (b >= 0xf0) { switch (b) { case 0xf0: port->data[0] = b; port->state = STATE_SYSEX_1; break; case 0xf1: case 0xf3: port->data[0] = b; port->state = STATE_1PARAM; break; case 0xf2: port->data[0] = b; port->state = STATE_2PARAM_1; break; case 0xf4: case 0xf5: port->state = STATE_UNKNOWN; break; case 0xf6: output_packet(urb, p0 | 0x05, 0xf6, 0, 0); port->state = STATE_UNKNOWN; break; case 0xf7: switch (port->state) { case STATE_SYSEX_0: output_packet(urb, p0 | 0x05, 0xf7, 0, 0); break; case STATE_SYSEX_1: output_packet(urb, p0 | 0x06, port->data[0], 0xf7, 0); break; case STATE_SYSEX_2: output_packet(urb, p0 | 0x07, port->data[0], port->data[1], 0xf7); break; } port->state = STATE_UNKNOWN; break; } } else if (b >= 0x80) { port->data[0] = b; if (b >= 0xc0 && b <= 0xdf) port->state = STATE_1PARAM; else port->state = STATE_2PARAM_1; } else { /* b < 0x80 */ switch (port->state) { case STATE_1PARAM: if (port->data[0] < 0xf0) { p0 |= port->data[0] >> 4; } else { p0 |= 0x02; port->state = STATE_UNKNOWN; } output_packet(urb, p0, port->data[0], b, 0); break; case STATE_2PARAM_1: port->data[1] = b; port->state = STATE_2PARAM_2; break; case STATE_2PARAM_2: if (port->data[0] < 0xf0) { p0 |= port->data[0] >> 4; port->state = STATE_2PARAM_1; } else { p0 |= 0x03; port->state = STATE_UNKNOWN; } output_packet(urb, p0, port->data[0], port->data[1], b); break; case STATE_SYSEX_0: port->data[0] = b; port->state = STATE_SYSEX_1; break; case STATE_SYSEX_1: port->data[1] = b; port->state = STATE_SYSEX_2; break; case STATE_SYSEX_2: output_packet(urb, p0 | 0x04, port->data[0], port->data[1], b); port->state = STATE_SYSEX_0; break; } } } static void snd_usbmidi_standard_output(struct snd_usb_midi_out_endpoint *ep, struct urb *urb) { int p; /* FIXME: lower-numbered ports can starve higher-numbered ports */ for (p = 0; p < 0x10; ++p) { struct usbmidi_out_port *port = &ep->ports[p]; if (!port->active) continue; while (urb->transfer_buffer_length + 3 < ep->max_transfer) { uint8_t b; if (snd_rawmidi_transmit(port->substream, &b, 1) != 1) { port->active = 0; break; } snd_usbmidi_transmit_byte(port, b, urb); } } } static const struct usb_protocol_ops snd_usbmidi_standard_ops = { .input = snd_usbmidi_standard_input, .output = snd_usbmidi_standard_output, .output_packet = snd_usbmidi_output_standard_packet, }; static const struct usb_protocol_ops snd_usbmidi_midiman_ops = { .input = snd_usbmidi_midiman_input, .output = snd_usbmidi_standard_output, .output_packet = snd_usbmidi_output_midiman_packet, }; static const struct usb_protocol_ops snd_usbmidi_maudio_broken_running_status_ops = { .input = snd_usbmidi_maudio_broken_running_status_input, .output = snd_usbmidi_standard_output, .output_packet = snd_usbmidi_output_standard_packet, }; static const struct usb_protocol_ops snd_usbmidi_cme_ops = { .input = snd_usbmidi_cme_input, .output = snd_usbmidi_standard_output, .output_packet = snd_usbmidi_output_standard_packet, }; static const struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = { .input = ch345_broken_sysex_input, .output = snd_usbmidi_standard_output, .output_packet = snd_usbmidi_output_standard_packet, }; /* * AKAI MPD16 protocol: * * For control port (endpoint 1): * ============================== * One or more chunks consisting of first byte of (0x10 | msg_len) and then a * SysEx message (msg_len=9 bytes long). * * For data port (endpoint 2): * =========================== * One or more chunks consisting of first byte of (0x20 | msg_len) and then a * MIDI message (msg_len bytes long) * * Messages sent: Active Sense, Note On, Poly Pressure, Control Change. */ static void snd_usbmidi_akai_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { unsigned int pos = 0; unsigned int len = (unsigned int)buffer_length; while (pos < len) { unsigned int port = (buffer[pos] >> 4) - 1; unsigned int msg_len = buffer[pos] & 0x0f; pos++; if (pos + msg_len <= len && port < 2) snd_usbmidi_input_data(ep, 0, &buffer[pos], msg_len); pos += msg_len; } } #define MAX_AKAI_SYSEX_LEN 9 static void snd_usbmidi_akai_output(struct snd_usb_midi_out_endpoint *ep, struct urb *urb) { uint8_t *msg; int pos, end, count, buf_end; uint8_t tmp[MAX_AKAI_SYSEX_LEN]; struct snd_rawmidi_substream *substream = ep->ports[0].substream; if (!ep->ports[0].active) return; msg = urb->transfer_buffer + urb->transfer_buffer_length; buf_end = ep->max_transfer - MAX_AKAI_SYSEX_LEN - 1; /* only try adding more data when there's space for at least 1 SysEx */ while (urb->transfer_buffer_length < buf_end) { count = snd_rawmidi_transmit_peek(substream, tmp, MAX_AKAI_SYSEX_LEN); if (!count) { ep->ports[0].active = 0; return; } /* try to skip non-SysEx data */ for (pos = 0; pos < count && tmp[pos] != 0xF0; pos++) ; if (pos > 0) { snd_rawmidi_transmit_ack(substream, pos); continue; } /* look for the start or end marker */ for (end = 1; end < count && tmp[end] < 0xF0; end++) ; /* next SysEx started before the end of current one */ if (end < count && tmp[end] == 0xF0) { /* it's incomplete - drop it */ snd_rawmidi_transmit_ack(substream, end); continue; } /* SysEx complete */ if (end < count && tmp[end] == 0xF7) { /* queue it, ack it, and get the next one */ count = end + 1; msg[0] = 0x10 | count; memcpy(&msg[1], tmp, count); snd_rawmidi_transmit_ack(substream, count); urb->transfer_buffer_length += count + 1; msg += count + 1; continue; } /* less than 9 bytes and no end byte - wait for more */ if (count < MAX_AKAI_SYSEX_LEN) { ep->ports[0].active = 0; return; } /* 9 bytes and no end marker in sight - malformed, skip it */ snd_rawmidi_transmit_ack(substream, count); } } static const struct usb_protocol_ops snd_usbmidi_akai_ops = { .input = snd_usbmidi_akai_input, .output = snd_usbmidi_akai_output, }; /* * Novation USB MIDI protocol: number of data bytes is in the first byte * (when receiving) (+1!) or in the second byte (when sending); data begins * at the third byte. */ static void snd_usbmidi_novation_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { if (buffer_length < 2 || !buffer[0] || buffer_length < buffer[0] + 1) return; snd_usbmidi_input_data(ep, 0, &buffer[2], buffer[0] - 1); } static void snd_usbmidi_novation_output(struct snd_usb_midi_out_endpoint *ep, struct urb *urb) { uint8_t *transfer_buffer; int count; if (!ep->ports[0].active) return; transfer_buffer = urb->transfer_buffer; count = snd_rawmidi_transmit(ep->ports[0].substream, &transfer_buffer[2], ep->max_transfer - 2); if (count < 1) { ep->ports[0].active = 0; return; } transfer_buffer[0] = 0; transfer_buffer[1] = count; urb->transfer_buffer_length = 2 + count; } static const struct usb_protocol_ops snd_usbmidi_novation_ops = { .input = snd_usbmidi_novation_input, .output = snd_usbmidi_novation_output, }; /* * "raw" protocol: just move raw MIDI bytes from/to the endpoint */ static void snd_usbmidi_raw_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { snd_usbmidi_input_data(ep, 0, buffer, buffer_length); } static void snd_usbmidi_raw_output(struct snd_usb_midi_out_endpoint *ep, struct urb *urb) { int count; if (!ep->ports[0].active) return; count = snd_rawmidi_transmit(ep->ports[0].substream, urb->transfer_buffer, ep->max_transfer); if (count < 1) { ep->ports[0].active = 0; return; } urb->transfer_buffer_length = count; } static const struct usb_protocol_ops snd_usbmidi_raw_ops = { .input = snd_usbmidi_raw_input, .output = snd_usbmidi_raw_output, }; /* * FTDI protocol: raw MIDI bytes, but input packets have two modem status bytes. */ static void snd_usbmidi_ftdi_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { if (buffer_length > 2) snd_usbmidi_input_data(ep, 0, buffer + 2, buffer_length - 2); } static const struct usb_protocol_ops snd_usbmidi_ftdi_ops = { .input = snd_usbmidi_ftdi_input, .output = snd_usbmidi_raw_output, }; static void snd_usbmidi_us122l_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { if (buffer_length != 9) return; buffer_length = 8; while (buffer_length && buffer[buffer_length - 1] == 0xFD) buffer_length--; if (buffer_length) snd_usbmidi_input_data(ep, 0, buffer, buffer_length); } static void snd_usbmidi_us122l_output(struct snd_usb_midi_out_endpoint *ep, struct urb *urb) { int count; if (!ep->ports[0].active) return; switch (snd_usb_get_speed(ep->umidi->dev)) { case USB_SPEED_HIGH: case USB_SPEED_SUPER: case USB_SPEED_SUPER_PLUS: count = 1; break; default: count = 2; } count = snd_rawmidi_transmit(ep->ports[0].substream, urb->transfer_buffer, count); if (count < 1) { ep->ports[0].active = 0; return; } memset(urb->transfer_buffer + count, 0xFD, ep->max_transfer - count); urb->transfer_buffer_length = ep->max_transfer; } static const struct usb_protocol_ops snd_usbmidi_122l_ops = { .input = snd_usbmidi_us122l_input, .output = snd_usbmidi_us122l_output, }; /* * Emagic USB MIDI protocol: raw MIDI with "F5 xx" port switching. */ static void snd_usbmidi_emagic_init_out(struct snd_usb_midi_out_endpoint *ep) { static const u8 init_data[] = { /* initialization magic: "get version" */ 0xf0, 0x00, 0x20, 0x31, /* Emagic */ 0x64, /* Unitor8 */ 0x0b, /* version number request */ 0x00, /* command version */ 0x00, /* EEPROM, box 0 */ 0xf7 }; send_bulk_static_data(ep, init_data, sizeof(init_data)); /* while we're at it, pour on more magic */ send_bulk_static_data(ep, init_data, sizeof(init_data)); } static void snd_usbmidi_emagic_finish_out(struct snd_usb_midi_out_endpoint *ep) { static const u8 finish_data[] = { /* switch to patch mode with last preset */ 0xf0, 0x00, 0x20, 0x31, /* Emagic */ 0x64, /* Unitor8 */ 0x10, /* patch switch command */ 0x00, /* command version */ 0x7f, /* to all boxes */ 0x40, /* last preset in EEPROM */ 0xf7 }; send_bulk_static_data(ep, finish_data, sizeof(finish_data)); } static void snd_usbmidi_emagic_input(struct snd_usb_midi_in_endpoint *ep, uint8_t *buffer, int buffer_length) { int i; /* FF indicates end of valid data */ for (i = 0; i < buffer_length; ++i) if (buffer[i] == 0xff) { buffer_length = i; break; } /* handle F5 at end of last buffer */ if (ep->seen_f5) goto switch_port; while (buffer_length > 0) { /* determine size of data until next F5 */ for (i = 0; i < buffer_length; ++i) if (buffer[i] == 0xf5) break; snd_usbmidi_input_data(ep, ep->current_port, buffer, i); buffer += i; buffer_length -= i; if (buffer_length <= 0) break; /* assert(buffer[0] == 0xf5); */ ep->seen_f5 = 1; ++buffer; --buffer_length; switch_port: if (buffer_length <= 0) break; if (buffer[0] < 0x80) { ep->current_port = (buffer[0] - 1) & 15; ++buffer; --buffer_length; } ep->seen_f5 = 0; } } static void snd_usbmidi_emagic_output(struct snd_usb_midi_out_endpoint *ep, struct urb *urb) { int port0 = ep->current_port; uint8_t *buf = urb->transfer_buffer; int buf_free = ep->max_transfer; int length, i; for (i = 0; i < 0x10; ++i) { /* round-robin, starting at the last current port */ int portnum = (port0 + i) & 15; struct usbmidi_out_port *port = &ep->ports[portnum]; if (!port->active) continue; if (snd_rawmidi_transmit_peek(port->substream, buf, 1) != 1) { port->active = 0; continue; } if (portnum != ep->current_port) { if (buf_free < 2) break; ep->current_port = portnum; buf[0] = 0xf5; buf[1] = (portnum + 1) & 15; buf += 2; buf_free -= 2; } if (buf_free < 1) break; length = snd_rawmidi_transmit(port->substream, buf, buf_free); if (length > 0) { buf += length; buf_free -= length; if (buf_free < 1) break; } } if (buf_free < ep->max_transfer && buf_free > 0) { *buf = 0xff; --buf_free; } urb->transfer_buffer_length = ep->max_transfer - buf_free; } static const struct usb_protocol_ops snd_usbmidi_emagic_ops = { .input = snd_usbmidi_emagic_input, .output = snd_usbmidi_emagic_output, .init_out_endpoint = snd_usbmidi_emagic_init_out, .finish_out_endpoint = snd_usbmidi_emagic_finish_out, }; static void update_roland_altsetting(struct snd_usb_midi *umidi) { struct usb_interface *intf; struct usb_host_interface *hostif; struct usb_interface_descriptor *intfd; int is_light_load; intf = umidi->iface; is_light_load = intf->cur_altsetting != intf->altsetting; if (umidi->roland_load_ctl->private_value == is_light_load) return; hostif = &intf->altsetting[umidi->roland_load_ctl->private_value]; intfd = get_iface_desc(hostif); snd_usbmidi_input_stop(&umidi->list); usb_set_interface(umidi->dev, intfd->bInterfaceNumber, intfd->bAlternateSetting); snd_usbmidi_input_start(&umidi->list); } static int substream_open(struct snd_rawmidi_substream *substream, int dir, int open) { struct snd_usb_midi *umidi = substream->rmidi->private_data; struct snd_kcontrol *ctl; down_read(&umidi->disc_rwsem); if (umidi->disconnected) { up_read(&umidi->disc_rwsem); return open ? -ENODEV : 0; } mutex_lock(&umidi->mutex); if (open) { if (!umidi->opened[0] && !umidi->opened[1]) { if (umidi->roland_load_ctl) { ctl = umidi->roland_load_ctl; ctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE; snd_ctl_notify(umidi->card, SNDRV_CTL_EVENT_MASK_INFO, &ctl->id); update_roland_altsetting(umidi); } } umidi->opened[dir]++; if (umidi->opened[1]) snd_usbmidi_input_start(&umidi->list); } else { umidi->opened[dir]--; if (!umidi->opened[1]) snd_usbmidi_input_stop(&umidi->list); if (!umidi->opened[0] && !umidi->opened[1]) { if (umidi->roland_load_ctl) { ctl = umidi->roland_load_ctl; ctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE; snd_ctl_notify(umidi->card, SNDRV_CTL_EVENT_MASK_INFO, &ctl->id); } } } mutex_unlock(&umidi->mutex); up_read(&umidi->disc_rwsem); return 0; } static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) { struct snd_usb_midi *umidi = substream->rmidi->private_data; struct usbmidi_out_port *port = NULL; int i, j; for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) if (umidi->endpoints[i].out) for (j = 0; j < 0x10; ++j) if (umidi->endpoints[i].out->ports[j].substream == substream) { port = &umidi->endpoints[i].out->ports[j]; break; } if (!port) return -ENXIO; substream->runtime->private_data = port; port->state = STATE_UNKNOWN; return substream_open(substream, 0, 1); } static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream) { struct usbmidi_out_port *port = substream->runtime->private_data; cancel_work_sync(&port->ep->work); return substream_open(substream, 0, 0); } static void snd_usbmidi_output_trigger(struct snd_rawmidi_substream *substream, int up) { struct usbmidi_out_port *port = (struct usbmidi_out_port *)substream->runtime->private_data; port->active = up; if (up) { if (port->ep->umidi->disconnected) { /* gobble up remaining bytes to prevent wait in * snd_rawmidi_drain_output */ snd_rawmidi_proceed(substream); return; } queue_work(system_highpri_wq, &port->ep->work); } } static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream) { struct usbmidi_out_port *port = substream->runtime->private_data; struct snd_usb_midi_out_endpoint *ep = port->ep; unsigned int drain_urbs; DEFINE_WAIT(wait); long timeout = msecs_to_jiffies(50); if (ep->umidi->disconnected) return; /* * The substream buffer is empty, but some data might still be in the * currently active URBs, so we have to wait for those to complete. */ spin_lock_irq(&ep->buffer_lock); drain_urbs = ep->active_urbs; if (drain_urbs) { ep->drain_urbs |= drain_urbs; do { prepare_to_wait(&ep->drain_wait, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&ep->buffer_lock); timeout = schedule_timeout(timeout); spin_lock_irq(&ep->buffer_lock); drain_urbs &= ep->drain_urbs; } while (drain_urbs && timeout); finish_wait(&ep->drain_wait, &wait); } port->active = 0; spin_unlock_irq(&ep->buffer_lock); } static int snd_usbmidi_input_open(struct snd_rawmidi_substream *substream) { return substream_open(substream, 1, 1); } static int snd_usbmidi_input_close(struct snd_rawmidi_substream *substream) { return substream_open(substream, 1, 0); } static void snd_usbmidi_input_trigger(struct snd_rawmidi_substream *substream, int up) { struct snd_usb_midi *umidi = substream->rmidi->private_data; if (up) set_bit(substream->number, &umidi->input_triggered); else clear_bit(substream->number, &umidi->input_triggered); } static const struct snd_rawmidi_ops snd_usbmidi_output_ops = { .open = snd_usbmidi_output_open, .close = snd_usbmidi_output_close, .trigger = snd_usbmidi_output_trigger, .drain = snd_usbmidi_output_drain, }; static const struct snd_rawmidi_ops snd_usbmidi_input_ops = { .open = snd_usbmidi_input_open, .close = snd_usbmidi_input_close, .trigger = snd_usbmidi_input_trigger }; static void free_urb_and_buffer(struct snd_usb_midi *umidi, struct urb *urb, unsigned int buffer_length) { usb_free_coherent(umidi->dev, buffer_length, urb->transfer_buffer, urb->transfer_dma); usb_free_urb(urb); } /* * Frees an input endpoint. * May be called when ep hasn't been initialized completely. */ static void snd_usbmidi_in_endpoint_delete(struct snd_usb_midi_in_endpoint *ep) { unsigned int i; for (i = 0; i < INPUT_URBS; ++i) if (ep->urbs[i]) free_urb_and_buffer(ep->umidi, ep->urbs[i], ep->urbs[i]->transfer_buffer_length); kfree(ep); } /* * Creates an input endpoint. */ static int snd_usbmidi_in_endpoint_create(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *ep_info, struct snd_usb_midi_endpoint *rep) { struct snd_usb_midi_in_endpoint *ep; void *buffer; unsigned int pipe; int length; unsigned int i; int err; rep->in = NULL; ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (!ep) return -ENOMEM; ep->umidi = umidi; for (i = 0; i < INPUT_URBS; ++i) { ep->urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (!ep->urbs[i]) { err = -ENOMEM; goto error; } } if (ep_info->in_interval) pipe = usb_rcvintpipe(umidi->dev, ep_info->in_ep); else pipe = usb_rcvbulkpipe(umidi->dev, ep_info->in_ep); length = usb_maxpacket(umidi->dev, pipe); for (i = 0; i < INPUT_URBS; ++i) { buffer = usb_alloc_coherent(umidi->dev, length, GFP_KERNEL, &ep->urbs[i]->transfer_dma); if (!buffer) { err = -ENOMEM; goto error; } if (ep_info->in_interval) usb_fill_int_urb(ep->urbs[i], umidi->dev, pipe, buffer, length, snd_usbmidi_in_urb_complete, ep, ep_info->in_interval); else usb_fill_bulk_urb(ep->urbs[i], umidi->dev, pipe, buffer, length, snd_usbmidi_in_urb_complete, ep); ep->urbs[i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP; err = usb_urb_ep_type_check(ep->urbs[i]); if (err < 0) { dev_err(&umidi->dev->dev, "invalid MIDI in EP %x\n", ep_info->in_ep); goto error; } } rep->in = ep; return 0; error: snd_usbmidi_in_endpoint_delete(ep); return err; } /* * Frees an output endpoint. * May be called when ep hasn't been initialized completely. */ static void snd_usbmidi_out_endpoint_clear(struct snd_usb_midi_out_endpoint *ep) { unsigned int i; for (i = 0; i < OUTPUT_URBS; ++i) if (ep->urbs[i].urb) { free_urb_and_buffer(ep->umidi, ep->urbs[i].urb, ep->max_transfer); ep->urbs[i].urb = NULL; } } static void snd_usbmidi_out_endpoint_delete(struct snd_usb_midi_out_endpoint *ep) { snd_usbmidi_out_endpoint_clear(ep); kfree(ep); } /* * Creates an output endpoint, and initializes output ports. */ static int snd_usbmidi_out_endpoint_create(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *ep_info, struct snd_usb_midi_endpoint *rep) { struct snd_usb_midi_out_endpoint *ep; unsigned int i; unsigned int pipe; void *buffer; int err; rep->out = NULL; ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (!ep) return -ENOMEM; ep->umidi = umidi; for (i = 0; i < OUTPUT_URBS; ++i) { ep->urbs[i].urb = usb_alloc_urb(0, GFP_KERNEL); if (!ep->urbs[i].urb) { err = -ENOMEM; goto error; } ep->urbs[i].ep = ep; } if (ep_info->out_interval) pipe = usb_sndintpipe(umidi->dev, ep_info->out_ep); else pipe = usb_sndbulkpipe(umidi->dev, ep_info->out_ep); switch (umidi->usb_id) { default: ep->max_transfer = usb_maxpacket(umidi->dev, pipe); break; /* * Various chips declare a packet size larger than 4 bytes, but * do not actually work with larger packets: */ case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */ case USB_ID(0x0a92, 0x1020): /* ESI M4U */ case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */ case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */ case USB_ID(0x15ca, 0x1806): /* Textech USB Midi Cable */ case USB_ID(0x1a86, 0x752d): /* QinHeng CH345 "USB2.0-MIDI" */ case USB_ID(0xfc08, 0x0101): /* Unknown vendor Cable */ ep->max_transfer = 4; break; /* * Some devices only work with 9 bytes packet size: */ case USB_ID(0x0644, 0x800e): /* Tascam US-122L */ case USB_ID(0x0644, 0x800f): /* Tascam US-144 */ ep->max_transfer = 9; break; } for (i = 0; i < OUTPUT_URBS; ++i) { buffer = usb_alloc_coherent(umidi->dev, ep->max_transfer, GFP_KERNEL, &ep->urbs[i].urb->transfer_dma); if (!buffer) { err = -ENOMEM; goto error; } if (ep_info->out_interval) usb_fill_int_urb(ep->urbs[i].urb, umidi->dev, pipe, buffer, ep->max_transfer, snd_usbmidi_out_urb_complete, &ep->urbs[i], ep_info->out_interval); else usb_fill_bulk_urb(ep->urbs[i].urb, umidi->dev, pipe, buffer, ep->max_transfer, snd_usbmidi_out_urb_complete, &ep->urbs[i]); err = usb_urb_ep_type_check(ep->urbs[i].urb); if (err < 0) { dev_err(&umidi->dev->dev, "invalid MIDI out EP %x\n", ep_info->out_ep); goto error; } ep->urbs[i].urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; } spin_lock_init(&ep->buffer_lock); INIT_WORK(&ep->work, snd_usbmidi_out_work); init_waitqueue_head(&ep->drain_wait); for (i = 0; i < 0x10; ++i) if (ep_info->out_cables & (1 << i)) { ep->ports[i].ep = ep; ep->ports[i].cable = i << 4; } if (umidi->usb_protocol_ops->init_out_endpoint) umidi->usb_protocol_ops->init_out_endpoint(ep); rep->out = ep; return 0; error: snd_usbmidi_out_endpoint_delete(ep); return err; } /* * Frees everything. */ static void snd_usbmidi_free(struct snd_usb_midi *umidi) { int i; for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i]; if (ep->out) snd_usbmidi_out_endpoint_delete(ep->out); if (ep->in) snd_usbmidi_in_endpoint_delete(ep->in); } mutex_destroy(&umidi->mutex); kfree(umidi); } /* * Unlinks all URBs (must be done before the usb_device is deleted). */ void snd_usbmidi_disconnect(struct list_head *p) { struct snd_usb_midi *umidi; unsigned int i, j; umidi = list_entry(p, struct snd_usb_midi, list); /* * an URB's completion handler may start the timer and * a timer may submit an URB. To reliably break the cycle * a flag under lock must be used */ down_write(&umidi->disc_rwsem); spin_lock_irq(&umidi->disc_lock); umidi->disconnected = 1; spin_unlock_irq(&umidi->disc_lock); up_write(&umidi->disc_rwsem); del_timer_sync(&umidi->error_timer); for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i]; if (ep->out) cancel_work_sync(&ep->out->work); if (ep->out) { for (j = 0; j < OUTPUT_URBS; ++j) usb_kill_urb(ep->out->urbs[j].urb); if (umidi->usb_protocol_ops->finish_out_endpoint) umidi->usb_protocol_ops->finish_out_endpoint(ep->out); ep->out->active_urbs = 0; if (ep->out->drain_urbs) { ep->out->drain_urbs = 0; wake_up(&ep->out->drain_wait); } } if (ep->in) for (j = 0; j < INPUT_URBS; ++j) usb_kill_urb(ep->in->urbs[j]); /* free endpoints here; later call can result in Oops */ if (ep->out) snd_usbmidi_out_endpoint_clear(ep->out); if (ep->in) { snd_usbmidi_in_endpoint_delete(ep->in); ep->in = NULL; } } } EXPORT_SYMBOL(snd_usbmidi_disconnect); static void snd_usbmidi_rawmidi_free(struct snd_rawmidi *rmidi) { struct snd_usb_midi *umidi = rmidi->private_data; snd_usbmidi_free(umidi); } static struct snd_rawmidi_substream *snd_usbmidi_find_substream(struct snd_usb_midi *umidi, int stream, int number) { struct snd_rawmidi_substream *substream; list_for_each_entry(substream, &umidi->rmidi->streams[stream].substreams, list) { if (substream->number == number) return substream; } return NULL; } /* * This list specifies names for ports that do not fit into the standard * "(product) MIDI (n)" schema because they aren't external MIDI ports, * such as internal control or synthesizer ports. */ static struct port_info { u32 id; short int port; short int voices; const char *name; unsigned int seq_flags; } snd_usbmidi_port_info[] = { #define PORT_INFO(vendor, product, num, name_, voices_, flags) \ { .id = USB_ID(vendor, product), \ .port = num, .voices = voices_, \ .name = name_, .seq_flags = flags } #define EXTERNAL_PORT(vendor, product, num, name) \ PORT_INFO(vendor, product, num, name, 0, \ SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \ SNDRV_SEQ_PORT_TYPE_HARDWARE | \ SNDRV_SEQ_PORT_TYPE_PORT) #define CONTROL_PORT(vendor, product, num, name) \ PORT_INFO(vendor, product, num, name, 0, \ SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \ SNDRV_SEQ_PORT_TYPE_HARDWARE) #define GM_SYNTH_PORT(vendor, product, num, name, voices) \ PORT_INFO(vendor, product, num, name, voices, \ SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \ SNDRV_SEQ_PORT_TYPE_MIDI_GM | \ SNDRV_SEQ_PORT_TYPE_HARDWARE | \ SNDRV_SEQ_PORT_TYPE_SYNTHESIZER) #define ROLAND_SYNTH_PORT(vendor, product, num, name, voices) \ PORT_INFO(vendor, product, num, name, voices, \ SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \ SNDRV_SEQ_PORT_TYPE_MIDI_GM | \ SNDRV_SEQ_PORT_TYPE_MIDI_GM2 | \ SNDRV_SEQ_PORT_TYPE_MIDI_GS | \ SNDRV_SEQ_PORT_TYPE_MIDI_XG | \ SNDRV_SEQ_PORT_TYPE_HARDWARE | \ SNDRV_SEQ_PORT_TYPE_SYNTHESIZER) #define SOUNDCANVAS_PORT(vendor, product, num, name, voices) \ PORT_INFO(vendor, product, num, name, voices, \ SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \ SNDRV_SEQ_PORT_TYPE_MIDI_GM | \ SNDRV_SEQ_PORT_TYPE_MIDI_GM2 | \ SNDRV_SEQ_PORT_TYPE_MIDI_GS | \ SNDRV_SEQ_PORT_TYPE_MIDI_XG | \ SNDRV_SEQ_PORT_TYPE_MIDI_MT32 | \ SNDRV_SEQ_PORT_TYPE_HARDWARE | \ SNDRV_SEQ_PORT_TYPE_SYNTHESIZER) /* Yamaha MOTIF XF */ GM_SYNTH_PORT(0x0499, 0x105c, 0, "%s Tone Generator", 128), CONTROL_PORT(0x0499, 0x105c, 1, "%s Remote Control"), EXTERNAL_PORT(0x0499, 0x105c, 2, "%s Thru"), CONTROL_PORT(0x0499, 0x105c, 3, "%s Editor"), /* Roland UA-100 */ CONTROL_PORT(0x0582, 0x0000, 2, "%s Control"), /* Roland SC-8850 */ SOUNDCANVAS_PORT(0x0582, 0x0003, 0, "%s Part A", 128), SOUNDCANVAS_PORT(0x0582, 0x0003, 1, "%s Part B", 128), SOUNDCANVAS_PORT(0x0582, 0x0003, 2, "%s Part C", 128), SOUNDCANVAS_PORT(0x0582, 0x0003, 3, "%s Part D", 128), EXTERNAL_PORT(0x0582, 0x0003, 4, "%s MIDI 1"), EXTERNAL_PORT(0x0582, 0x0003, 5, "%s MIDI 2"), /* Roland U-8 */ EXTERNAL_PORT(0x0582, 0x0004, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x0004, 1, "%s Control"), /* Roland SC-8820 */ SOUNDCANVAS_PORT(0x0582, 0x0007, 0, "%s Part A", 64), SOUNDCANVAS_PORT(0x0582, 0x0007, 1, "%s Part B", 64), EXTERNAL_PORT(0x0582, 0x0007, 2, "%s MIDI"), /* Roland SK-500 */ SOUNDCANVAS_PORT(0x0582, 0x000b, 0, "%s Part A", 64), SOUNDCANVAS_PORT(0x0582, 0x000b, 1, "%s Part B", 64), EXTERNAL_PORT(0x0582, 0x000b, 2, "%s MIDI"), /* Roland SC-D70 */ SOUNDCANVAS_PORT(0x0582, 0x000c, 0, "%s Part A", 64), SOUNDCANVAS_PORT(0x0582, 0x000c, 1, "%s Part B", 64), EXTERNAL_PORT(0x0582, 0x000c, 2, "%s MIDI"), /* Edirol UM-880 */ CONTROL_PORT(0x0582, 0x0014, 8, "%s Control"), /* Edirol SD-90 */ ROLAND_SYNTH_PORT(0x0582, 0x0016, 0, "%s Part A", 128), ROLAND_SYNTH_PORT(0x0582, 0x0016, 1, "%s Part B", 128), EXTERNAL_PORT(0x0582, 0x0016, 2, "%s MIDI 1"), EXTERNAL_PORT(0x0582, 0x0016, 3, "%s MIDI 2"), /* Edirol UM-550 */ CONTROL_PORT(0x0582, 0x0023, 5, "%s Control"), /* Edirol SD-20 */ ROLAND_SYNTH_PORT(0x0582, 0x0027, 0, "%s Part A", 64), ROLAND_SYNTH_PORT(0x0582, 0x0027, 1, "%s Part B", 64), EXTERNAL_PORT(0x0582, 0x0027, 2, "%s MIDI"), /* Edirol SD-80 */ ROLAND_SYNTH_PORT(0x0582, 0x0029, 0, "%s Part A", 128), ROLAND_SYNTH_PORT(0x0582, 0x0029, 1, "%s Part B", 128), EXTERNAL_PORT(0x0582, 0x0029, 2, "%s MIDI 1"), EXTERNAL_PORT(0x0582, 0x0029, 3, "%s MIDI 2"), /* Edirol UA-700 */ EXTERNAL_PORT(0x0582, 0x002b, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x002b, 1, "%s Control"), /* Roland VariOS */ EXTERNAL_PORT(0x0582, 0x002f, 0, "%s MIDI"), EXTERNAL_PORT(0x0582, 0x002f, 1, "%s External MIDI"), EXTERNAL_PORT(0x0582, 0x002f, 2, "%s Sync"), /* Edirol PCR */ EXTERNAL_PORT(0x0582, 0x0033, 0, "%s MIDI"), EXTERNAL_PORT(0x0582, 0x0033, 1, "%s 1"), EXTERNAL_PORT(0x0582, 0x0033, 2, "%s 2"), /* BOSS GS-10 */ EXTERNAL_PORT(0x0582, 0x003b, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x003b, 1, "%s Control"), /* Edirol UA-1000 */ EXTERNAL_PORT(0x0582, 0x0044, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x0044, 1, "%s Control"), /* Edirol UR-80 */ EXTERNAL_PORT(0x0582, 0x0048, 0, "%s MIDI"), EXTERNAL_PORT(0x0582, 0x0048, 1, "%s 1"), EXTERNAL_PORT(0x0582, 0x0048, 2, "%s 2"), /* Edirol PCR-A */ EXTERNAL_PORT(0x0582, 0x004d, 0, "%s MIDI"), EXTERNAL_PORT(0x0582, 0x004d, 1, "%s 1"), EXTERNAL_PORT(0x0582, 0x004d, 2, "%s 2"), /* BOSS GT-PRO */ CONTROL_PORT(0x0582, 0x0089, 0, "%s Control"), /* Edirol UM-3EX */ CONTROL_PORT(0x0582, 0x009a, 3, "%s Control"), /* Roland VG-99 */ CONTROL_PORT(0x0582, 0x00b2, 0, "%s Control"), EXTERNAL_PORT(0x0582, 0x00b2, 1, "%s MIDI"), /* Cakewalk Sonar V-Studio 100 */ EXTERNAL_PORT(0x0582, 0x00eb, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x00eb, 1, "%s Control"), /* Roland VB-99 */ CONTROL_PORT(0x0582, 0x0102, 0, "%s Control"), EXTERNAL_PORT(0x0582, 0x0102, 1, "%s MIDI"), /* Roland A-PRO */ EXTERNAL_PORT(0x0582, 0x010f, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x010f, 1, "%s 1"), CONTROL_PORT(0x0582, 0x010f, 2, "%s 2"), /* Roland SD-50 */ ROLAND_SYNTH_PORT(0x0582, 0x0114, 0, "%s Synth", 128), EXTERNAL_PORT(0x0582, 0x0114, 1, "%s MIDI"), CONTROL_PORT(0x0582, 0x0114, 2, "%s Control"), /* Roland OCTA-CAPTURE */ EXTERNAL_PORT(0x0582, 0x0120, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x0120, 1, "%s Control"), EXTERNAL_PORT(0x0582, 0x0121, 0, "%s MIDI"), CONTROL_PORT(0x0582, 0x0121, 1, "%s Control"), /* Roland SPD-SX */ CONTROL_PORT(0x0582, 0x0145, 0, "%s Control"), EXTERNAL_PORT(0x0582, 0x0145, 1, "%s MIDI"), /* Roland A-Series */ CONTROL_PORT(0x0582, 0x0156, 0, "%s Keyboard"), EXTERNAL_PORT(0x0582, 0x0156, 1, "%s MIDI"), /* Roland INTEGRA-7 */ ROLAND_SYNTH_PORT(0x0582, 0x015b, 0, "%s Synth", 128), CONTROL_PORT(0x0582, 0x015b, 1, "%s Control"), /* M-Audio MidiSport 8x8 */ CONTROL_PORT(0x0763, 0x1031, 8, "%s Control"), CONTROL_PORT(0x0763, 0x1033, 8, "%s Control"), /* MOTU Fastlane */ EXTERNAL_PORT(0x07fd, 0x0001, 0, "%s MIDI A"), EXTERNAL_PORT(0x07fd, 0x0001, 1, "%s MIDI B"), /* Emagic Unitor8/AMT8/MT4 */ EXTERNAL_PORT(0x086a, 0x0001, 8, "%s Broadcast"), EXTERNAL_PORT(0x086a, 0x0002, 8, "%s Broadcast"), EXTERNAL_PORT(0x086a, 0x0003, 4, "%s Broadcast"), /* Akai MPD16 */ CONTROL_PORT(0x09e8, 0x0062, 0, "%s Control"), PORT_INFO(0x09e8, 0x0062, 1, "%s MIDI", 0, SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | SNDRV_SEQ_PORT_TYPE_HARDWARE), /* Access Music Virus TI */ EXTERNAL_PORT(0x133e, 0x0815, 0, "%s MIDI"), PORT_INFO(0x133e, 0x0815, 1, "%s Synth", 0, SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | SNDRV_SEQ_PORT_TYPE_HARDWARE | SNDRV_SEQ_PORT_TYPE_SYNTHESIZER), }; static struct port_info *find_port_info(struct snd_usb_midi *umidi, int number) { int i; for (i = 0; i < ARRAY_SIZE(snd_usbmidi_port_info); ++i) { if (snd_usbmidi_port_info[i].id == umidi->usb_id && snd_usbmidi_port_info[i].port == number) return &snd_usbmidi_port_info[i]; } return NULL; } static void snd_usbmidi_get_port_info(struct snd_rawmidi *rmidi, int number, struct snd_seq_port_info *seq_port_info) { struct snd_usb_midi *umidi = rmidi->private_data; struct port_info *port_info; /* TODO: read port flags from descriptors */ port_info = find_port_info(umidi, number); if (port_info) { seq_port_info->type = port_info->seq_flags; seq_port_info->midi_voices = port_info->voices; } } /* return iJack for the corresponding jackID */ static int find_usb_ijack(struct usb_host_interface *hostif, uint8_t jack_id) { unsigned char *extra = hostif->extra; int extralen = hostif->extralen; struct usb_descriptor_header *h; struct usb_midi_out_jack_descriptor *outjd; struct usb_midi_in_jack_descriptor *injd; size_t sz; while (extralen > 4) { h = (struct usb_descriptor_header *)extra; if (h->bDescriptorType != USB_DT_CS_INTERFACE) goto next; outjd = (struct usb_midi_out_jack_descriptor *)h; if (h->bLength >= sizeof(*outjd) && outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK && outjd->bJackID == jack_id) { sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins); if (outjd->bLength < sz) goto next; return *(extra + sz - 1); } injd = (struct usb_midi_in_jack_descriptor *)h; if (injd->bLength >= sizeof(*injd) && injd->bDescriptorSubtype == UAC_MIDI_IN_JACK && injd->bJackID == jack_id) return injd->iJack; next: if (!extra[0]) break; extralen -= extra[0]; extra += extra[0]; } return 0; } static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, int stream, int number, int jack_id, struct snd_rawmidi_substream **rsubstream) { struct port_info *port_info; const char *name_format; struct usb_interface *intf; struct usb_host_interface *hostif; uint8_t jack_name_buf[32]; uint8_t *default_jack_name = "MIDI"; uint8_t *jack_name = default_jack_name; uint8_t iJack; int res; struct snd_rawmidi_substream *substream = snd_usbmidi_find_substream(umidi, stream, number); if (!substream) { dev_err(&umidi->dev->dev, "substream %d:%d not found\n", stream, number); return; } intf = umidi->iface; if (intf && jack_id >= 0) { hostif = intf->cur_altsetting; iJack = find_usb_ijack(hostif, jack_id); if (iJack != 0) { res = usb_string(umidi->dev, iJack, jack_name_buf, ARRAY_SIZE(jack_name_buf)); if (res) jack_name = jack_name_buf; } } port_info = find_port_info(umidi, number); name_format = port_info ? port_info->name : (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); snprintf(substream->name, sizeof(substream->name), name_format, umidi->card->shortname, jack_name, number + 1); *rsubstream = substream; } /* * Creates the endpoints and their ports. */ static int snd_usbmidi_create_endpoints(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *endpoints) { int i, j, err; int out_ports = 0, in_ports = 0; for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { if (endpoints[i].out_cables) { err = snd_usbmidi_out_endpoint_create(umidi, &endpoints[i], &umidi->endpoints[i]); if (err < 0) return err; } if (endpoints[i].in_cables) { err = snd_usbmidi_in_endpoint_create(umidi, &endpoints[i], &umidi->endpoints[i]); if (err < 0) return err; } for (j = 0; j < 0x10; ++j) { if (endpoints[i].out_cables & (1 << j)) { snd_usbmidi_init_substream(umidi, SNDRV_RAWMIDI_STREAM_OUTPUT, out_ports, endpoints[i].assoc_out_jacks[j], &umidi->endpoints[i].out->ports[j].substream); ++out_ports; } if (endpoints[i].in_cables & (1 << j)) { snd_usbmidi_init_substream(umidi, SNDRV_RAWMIDI_STREAM_INPUT, in_ports, endpoints[i].assoc_in_jacks[j], &umidi->endpoints[i].in->ports[j].substream); ++in_ports; } } } dev_dbg(&umidi->dev->dev, "created %d output and %d input ports\n", out_ports, in_ports); return 0; } static struct usb_ms_endpoint_descriptor *find_usb_ms_endpoint_descriptor( struct usb_host_endpoint *hostep) { unsigned char *extra = hostep->extra; int extralen = hostep->extralen; while (extralen > 3) { struct usb_ms_endpoint_descriptor *ms_ep = (struct usb_ms_endpoint_descriptor *)extra; if (ms_ep->bLength > 3 && ms_ep->bDescriptorType == USB_DT_CS_ENDPOINT && ms_ep->bDescriptorSubtype == UAC_MS_GENERAL) return ms_ep; if (!extra[0]) break; extralen -= extra[0]; extra += extra[0]; } return NULL; } /* * Returns MIDIStreaming device capabilities. */ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *endpoints) { struct usb_interface *intf; struct usb_host_interface *hostif; struct usb_interface_descriptor *intfd; struct usb_ms_header_descriptor *ms_header; struct usb_host_endpoint *hostep; struct usb_endpoint_descriptor *ep; struct usb_ms_endpoint_descriptor *ms_ep; int i, j, epidx; intf = umidi->iface; if (!intf) return -ENXIO; hostif = &intf->altsetting[0]; intfd = get_iface_desc(hostif); ms_header = (struct usb_ms_header_descriptor *)hostif->extra; if (hostif->extralen >= 7 && ms_header->bLength >= 7 && ms_header->bDescriptorType == USB_DT_CS_INTERFACE && ms_header->bDescriptorSubtype == UAC_HEADER) dev_dbg(&umidi->dev->dev, "MIDIStreaming version %02x.%02x\n", ((uint8_t *)&ms_header->bcdMSC)[1], ((uint8_t *)&ms_header->bcdMSC)[0]); else dev_warn(&umidi->dev->dev, "MIDIStreaming interface descriptor not found\n"); epidx = 0; for (i = 0; i < intfd->bNumEndpoints; ++i) { hostep = &hostif->endpoint[i]; ep = get_ep_desc(hostep); if (!usb_endpoint_xfer_bulk(ep) && !usb_endpoint_xfer_int(ep)) continue; ms_ep = find_usb_ms_endpoint_descriptor(hostep); if (!ms_ep) continue; if (ms_ep->bLength <= sizeof(*ms_ep)) continue; if (ms_ep->bNumEmbMIDIJack > 0x10) continue; if (ms_ep->bLength < sizeof(*ms_ep) + ms_ep->bNumEmbMIDIJack) continue; if (usb_endpoint_dir_out(ep)) { if (endpoints[epidx].out_ep) { if (++epidx >= MIDI_MAX_ENDPOINTS) { dev_warn(&umidi->dev->dev, "too many endpoints\n"); break; } } endpoints[epidx].out_ep = usb_endpoint_num(ep); if (usb_endpoint_xfer_int(ep)) endpoints[epidx].out_interval = ep->bInterval; else if (snd_usb_get_speed(umidi->dev) == USB_SPEED_LOW) /* * Low speed bulk transfers don't exist, so * force interrupt transfers for devices like * ESI MIDI Mate that try to use them anyway. */ endpoints[epidx].out_interval = 1; endpoints[epidx].out_cables = (1 << ms_ep->bNumEmbMIDIJack) - 1; for (j = 0; j < ms_ep->bNumEmbMIDIJack; ++j) endpoints[epidx].assoc_out_jacks[j] = ms_ep->baAssocJackID[j]; for (; j < ARRAY_SIZE(endpoints[epidx].assoc_out_jacks); ++j) endpoints[epidx].assoc_out_jacks[j] = -1; dev_dbg(&umidi->dev->dev, "EP %02X: %d jack(s)\n", ep->bEndpointAddress, ms_ep->bNumEmbMIDIJack); } else { if (endpoints[epidx].in_ep) { if (++epidx >= MIDI_MAX_ENDPOINTS) { dev_warn(&umidi->dev->dev, "too many endpoints\n"); break; } } endpoints[epidx].in_ep = usb_endpoint_num(ep); if (usb_endpoint_xfer_int(ep)) endpoints[epidx].in_interval = ep->bInterval; else if (snd_usb_get_speed(umidi->dev) == USB_SPEED_LOW) endpoints[epidx].in_interval = 1; endpoints[epidx].in_cables = (1 << ms_ep->bNumEmbMIDIJack) - 1; for (j = 0; j < ms_ep->bNumEmbMIDIJack; ++j) endpoints[epidx].assoc_in_jacks[j] = ms_ep->baAssocJackID[j]; for (; j < ARRAY_SIZE(endpoints[epidx].assoc_in_jacks); ++j) endpoints[epidx].assoc_in_jacks[j] = -1; dev_dbg(&umidi->dev->dev, "EP %02X: %d jack(s)\n", ep->bEndpointAddress, ms_ep->bNumEmbMIDIJack); } } return 0; } static int roland_load_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *info) { static const char *const names[] = { "High Load", "Light Load" }; return snd_ctl_enum_info(info, 1, 2, names); } static int roland_load_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *value) { value->value.enumerated.item[0] = kcontrol->private_value; return 0; } static int roland_load_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *value) { struct snd_usb_midi *umidi = kcontrol->private_data; int changed; if (value->value.enumerated.item[0] > 1) return -EINVAL; mutex_lock(&umidi->mutex); changed = value->value.enumerated.item[0] != kcontrol->private_value; if (changed) kcontrol->private_value = value->value.enumerated.item[0]; mutex_unlock(&umidi->mutex); return changed; } static const struct snd_kcontrol_new roland_load_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "MIDI Input Mode", .info = roland_load_info, .get = roland_load_get, .put = roland_load_put, .private_value = 1, }; /* * On Roland devices, use the second alternate setting to be able to use * the interrupt input endpoint. */ static void snd_usbmidi_switch_roland_altsetting(struct snd_usb_midi *umidi) { struct usb_interface *intf; struct usb_host_interface *hostif; struct usb_interface_descriptor *intfd; intf = umidi->iface; if (!intf || intf->num_altsetting != 2) return; hostif = &intf->altsetting[1]; intfd = get_iface_desc(hostif); /* If either or both of the endpoints support interrupt transfer, * then use the alternate setting */ if (intfd->bNumEndpoints != 2 || !((get_endpoint(hostif, 0)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT || (get_endpoint(hostif, 1)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT)) return; dev_dbg(&umidi->dev->dev, "switching to altsetting %d with int ep\n", intfd->bAlternateSetting); usb_set_interface(umidi->dev, intfd->bInterfaceNumber, intfd->bAlternateSetting); umidi->roland_load_ctl = snd_ctl_new1(&roland_load_ctl, umidi); if (snd_ctl_add(umidi->card, umidi->roland_load_ctl) < 0) umidi->roland_load_ctl = NULL; } /* * Try to find any usable endpoints in the interface. */ static int snd_usbmidi_detect_endpoints(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *endpoint, int max_endpoints) { struct usb_interface *intf; struct usb_host_interface *hostif; struct usb_interface_descriptor *intfd; struct usb_endpoint_descriptor *epd; int i, out_eps = 0, in_eps = 0; if (USB_ID_VENDOR(umidi->usb_id) == 0x0582) snd_usbmidi_switch_roland_altsetting(umidi); if (endpoint[0].out_ep || endpoint[0].in_ep) return 0; intf = umidi->iface; if (!intf || intf->num_altsetting < 1) return -ENOENT; hostif = intf->cur_altsetting; intfd = get_iface_desc(hostif); for (i = 0; i < intfd->bNumEndpoints; ++i) { epd = get_endpoint(hostif, i); if (!usb_endpoint_xfer_bulk(epd) && !usb_endpoint_xfer_int(epd)) continue; if (out_eps < max_endpoints && usb_endpoint_dir_out(epd)) { endpoint[out_eps].out_ep = usb_endpoint_num(epd); if (usb_endpoint_xfer_int(epd)) endpoint[out_eps].out_interval = epd->bInterval; ++out_eps; } if (in_eps < max_endpoints && usb_endpoint_dir_in(epd)) { endpoint[in_eps].in_ep = usb_endpoint_num(epd); if (usb_endpoint_xfer_int(epd)) endpoint[in_eps].in_interval = epd->bInterval; ++in_eps; } } return (out_eps || in_eps) ? 0 : -ENOENT; } /* * Detects the endpoints for one-port-per-endpoint protocols. */ static int snd_usbmidi_detect_per_port_endpoints(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *endpoints) { int err, i; err = snd_usbmidi_detect_endpoints(umidi, endpoints, MIDI_MAX_ENDPOINTS); for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { if (endpoints[i].out_ep) endpoints[i].out_cables = 0x0001; if (endpoints[i].in_ep) endpoints[i].in_cables = 0x0001; } return err; } /* * Detects the endpoints and ports of Yamaha devices. */ static int snd_usbmidi_detect_yamaha(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *endpoint) { struct usb_interface *intf; struct usb_host_interface *hostif; struct usb_interface_descriptor *intfd; uint8_t *cs_desc; intf = umidi->iface; if (!intf) return -ENOENT; hostif = intf->altsetting; intfd = get_iface_desc(hostif); if (intfd->bNumEndpoints < 1) return -ENOENT; /* * For each port there is one MIDI_IN/OUT_JACK descriptor, not * necessarily with any useful contents. So simply count 'em. */ for (cs_desc = hostif->extra; cs_desc < hostif->extra + hostif->extralen && cs_desc[0] >= 2; cs_desc += cs_desc[0]) { if (cs_desc[1] == USB_DT_CS_INTERFACE) { if (cs_desc[2] == UAC_MIDI_IN_JACK) endpoint->in_cables = (endpoint->in_cables << 1) | 1; else if (cs_desc[2] == UAC_MIDI_OUT_JACK) endpoint->out_cables = (endpoint->out_cables << 1) | 1; } } if (!endpoint->in_cables && !endpoint->out_cables) return -ENOENT; return snd_usbmidi_detect_endpoints(umidi, endpoint, 1); } /* * Detects the endpoints and ports of Roland devices. */ static int snd_usbmidi_detect_roland(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *endpoint) { struct usb_interface *intf; struct usb_host_interface *hostif; u8 *cs_desc; intf = umidi->iface; if (!intf) return -ENOENT; hostif = intf->altsetting; /* * Some devices have a descriptor <06 24 F1 02 <inputs> <outputs>>, * some have standard class descriptors, or both kinds, or neither. */ for (cs_desc = hostif->extra; cs_desc < hostif->extra + hostif->extralen && cs_desc[0] >= 2; cs_desc += cs_desc[0]) { if (cs_desc[0] >= 6 && cs_desc[1] == USB_DT_CS_INTERFACE && cs_desc[2] == 0xf1 && cs_desc[3] == 0x02) { if (cs_desc[4] > 0x10 || cs_desc[5] > 0x10) continue; endpoint->in_cables = (1 << cs_desc[4]) - 1; endpoint->out_cables = (1 << cs_desc[5]) - 1; return snd_usbmidi_detect_endpoints(umidi, endpoint, 1); } else if (cs_desc[0] >= 7 && cs_desc[1] == USB_DT_CS_INTERFACE && cs_desc[2] == UAC_HEADER) { return snd_usbmidi_get_ms_info(umidi, endpoint); } } return -ENODEV; } /* * Creates the endpoints and their ports for Midiman devices. */ static int snd_usbmidi_create_endpoints_midiman(struct snd_usb_midi *umidi, struct snd_usb_midi_endpoint_info *endpoint) { struct snd_usb_midi_endpoint_info ep_info; struct usb_interface *intf; struct usb_host_interface *hostif; struct usb_interface_descriptor *intfd; struct usb_endpoint_descriptor *epd; int cable, err; intf = umidi->iface; if (!intf) return -ENOENT; hostif = intf->altsetting; intfd = get_iface_desc(hostif); /* * The various MidiSport devices have more or less random endpoint * numbers, so we have to identify the endpoints by their index in * the descriptor array, like the driver for that other OS does. * * There is one interrupt input endpoint for all input ports, one * bulk output endpoint for even-numbered ports, and one for odd- * numbered ports. Both bulk output endpoints have corresponding * input bulk endpoints (at indices 1 and 3) which aren't used. */ if (intfd->bNumEndpoints < (endpoint->out_cables > 0x0001 ? 5 : 3)) { dev_dbg(&umidi->dev->dev, "not enough endpoints\n"); return -ENOENT; } epd = get_endpoint(hostif, 0); if (!usb_endpoint_dir_in(epd) || !usb_endpoint_xfer_int(epd)) { dev_dbg(&umidi->dev->dev, "endpoint[0] isn't interrupt\n"); return -ENXIO; } epd = get_endpoint(hostif, 2); if (!usb_endpoint_dir_out(epd) || !usb_endpoint_xfer_bulk(epd)) { dev_dbg(&umidi->dev->dev, "endpoint[2] isn't bulk output\n"); return -ENXIO; } if (endpoint->out_cables > 0x0001) { epd = get_endpoint(hostif, 4); if (!usb_endpoint_dir_out(epd) || !usb_endpoint_xfer_bulk(epd)) { dev_dbg(&umidi->dev->dev, "endpoint[4] isn't bulk output\n"); return -ENXIO; } } ep_info.out_ep = get_endpoint(hostif, 2)->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK; ep_info.out_interval = 0; ep_info.out_cables = endpoint->out_cables & 0x5555; err = snd_usbmidi_out_endpoint_create(umidi, &ep_info, &umidi->endpoints[0]); if (err < 0) return err; ep_info.in_ep = get_endpoint(hostif, 0)->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK; ep_info.in_interval = get_endpoint(hostif, 0)->bInterval; ep_info.in_cables = endpoint->in_cables; err = snd_usbmidi_in_endpoint_create(umidi, &ep_info, &umidi->endpoints[0]); if (err < 0) return err; if (endpoint->out_cables > 0x0001) { ep_info.out_ep = get_endpoint(hostif, 4)->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK; ep_info.out_cables = endpoint->out_cables & 0xaaaa; err = snd_usbmidi_out_endpoint_create(umidi, &ep_info, &umidi->endpoints[1]); if (err < 0) return err; } for (cable = 0; cable < 0x10; ++cable) { if (endpoint->out_cables & (1 << cable)) snd_usbmidi_init_substream(umidi, SNDRV_RAWMIDI_STREAM_OUTPUT, cable, -1 /* prevent trying to find jack */, &umidi->endpoints[cable & 1].out->ports[cable].substream); if (endpoint->in_cables & (1 << cable)) snd_usbmidi_init_substream(umidi, SNDRV_RAWMIDI_STREAM_INPUT, cable, -1 /* prevent trying to find jack */, &umidi->endpoints[0].in->ports[cable].substream); } return 0; } static const struct snd_rawmidi_global_ops snd_usbmidi_ops = { .get_port_info = snd_usbmidi_get_port_info, }; static int snd_usbmidi_create_rawmidi(struct snd_usb_midi *umidi, int out_ports, int in_ports) { struct snd_rawmidi *rmidi; int err; err = snd_rawmidi_new(umidi->card, "USB MIDI", umidi->next_midi_device++, out_ports, in_ports, &rmidi); if (err < 0) return err; strcpy(rmidi->name, umidi->card->shortname); rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT | SNDRV_RAWMIDI_INFO_INPUT | SNDRV_RAWMIDI_INFO_DUPLEX; rmidi->ops = &snd_usbmidi_ops; rmidi->private_data = umidi; rmidi->private_free = snd_usbmidi_rawmidi_free; snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &snd_usbmidi_output_ops); snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &snd_usbmidi_input_ops); umidi->rmidi = rmidi; return 0; } /* * Temporarily stop input. */ void snd_usbmidi_input_stop(struct list_head *p) { struct snd_usb_midi *umidi; unsigned int i, j; umidi = list_entry(p, struct snd_usb_midi, list); if (!umidi->input_running) return; for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i]; if (ep->in) for (j = 0; j < INPUT_URBS; ++j) usb_kill_urb(ep->in->urbs[j]); } umidi->input_running = 0; } EXPORT_SYMBOL(snd_usbmidi_input_stop); static void snd_usbmidi_input_start_ep(struct snd_usb_midi *umidi, struct snd_usb_midi_in_endpoint *ep) { unsigned int i; unsigned long flags; if (!ep) return; for (i = 0; i < INPUT_URBS; ++i) { struct urb *urb = ep->urbs[i]; spin_lock_irqsave(&umidi->disc_lock, flags); if (!atomic_read(&urb->use_count)) { urb->dev = ep->umidi->dev; snd_usbmidi_submit_urb(urb, GFP_ATOMIC); } spin_unlock_irqrestore(&umidi->disc_lock, flags); } } /* * Resume input after a call to snd_usbmidi_input_stop(). */ void snd_usbmidi_input_start(struct list_head *p) { struct snd_usb_midi *umidi; int i; umidi = list_entry(p, struct snd_usb_midi, list); if (umidi->input_running || !umidi->opened[1]) return; for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) snd_usbmidi_input_start_ep(umidi, umidi->endpoints[i].in); umidi->input_running = 1; } EXPORT_SYMBOL(snd_usbmidi_input_start); /* * Prepare for suspend. Typically called from the USB suspend callback. */ void snd_usbmidi_suspend(struct list_head *p) { struct snd_usb_midi *umidi; umidi = list_entry(p, struct snd_usb_midi, list); mutex_lock(&umidi->mutex); snd_usbmidi_input_stop(p); mutex_unlock(&umidi->mutex); } EXPORT_SYMBOL(snd_usbmidi_suspend); /* * Resume. Typically called from the USB resume callback. */ void snd_usbmidi_resume(struct list_head *p) { struct snd_usb_midi *umidi; umidi = list_entry(p, struct snd_usb_midi, list); mutex_lock(&umidi->mutex); snd_usbmidi_input_start(p); mutex_unlock(&umidi->mutex); } EXPORT_SYMBOL(snd_usbmidi_resume); /* * Creates and registers everything needed for a MIDI streaming interface. */ int __snd_usbmidi_create(struct snd_card *card, struct usb_interface *iface, struct list_head *midi_list, const struct snd_usb_audio_quirk *quirk, unsigned int usb_id, unsigned int *num_rawmidis) { struct snd_usb_midi *umidi; struct snd_usb_midi_endpoint_info endpoints[MIDI_MAX_ENDPOINTS]; int out_ports, in_ports; int i, err; umidi = kzalloc(sizeof(*umidi), GFP_KERNEL); if (!umidi) return -ENOMEM; umidi->dev = interface_to_usbdev(iface); umidi->card = card; umidi->iface = iface; umidi->quirk = quirk; umidi->usb_protocol_ops = &snd_usbmidi_standard_ops; if (num_rawmidis) umidi->next_midi_device = *num_rawmidis; spin_lock_init(&umidi->disc_lock); init_rwsem(&umidi->disc_rwsem); mutex_init(&umidi->mutex); if (!usb_id) usb_id = USB_ID(le16_to_cpu(umidi->dev->descriptor.idVendor), le16_to_cpu(umidi->dev->descriptor.idProduct)); umidi->usb_id = usb_id; timer_setup(&umidi->error_timer, snd_usbmidi_error_timer, 0); /* detect the endpoint(s) to use */ memset(endpoints, 0, sizeof(endpoints)); switch (quirk ? quirk->type : QUIRK_MIDI_STANDARD_INTERFACE) { case QUIRK_MIDI_STANDARD_INTERFACE: err = snd_usbmidi_get_ms_info(umidi, endpoints); if (umidi->usb_id == USB_ID(0x0763, 0x0150)) /* M-Audio Uno */ umidi->usb_protocol_ops = &snd_usbmidi_maudio_broken_running_status_ops; break; case QUIRK_MIDI_US122L: umidi->usb_protocol_ops = &snd_usbmidi_122l_ops; fallthrough; case QUIRK_MIDI_FIXED_ENDPOINT: memcpy(&endpoints[0], quirk->data, sizeof(struct snd_usb_midi_endpoint_info)); err = snd_usbmidi_detect_endpoints(umidi, &endpoints[0], 1); break; case QUIRK_MIDI_YAMAHA: err = snd_usbmidi_detect_yamaha(umidi, &endpoints[0]); break; case QUIRK_MIDI_ROLAND: err = snd_usbmidi_detect_roland(umidi, &endpoints[0]); break; case QUIRK_MIDI_MIDIMAN: umidi->usb_protocol_ops = &snd_usbmidi_midiman_ops; memcpy(&endpoints[0], quirk->data, sizeof(struct snd_usb_midi_endpoint_info)); err = 0; break; case QUIRK_MIDI_NOVATION: umidi->usb_protocol_ops = &snd_usbmidi_novation_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; case QUIRK_MIDI_RAW_BYTES: umidi->usb_protocol_ops = &snd_usbmidi_raw_ops; /* * Interface 1 contains isochronous endpoints, but with the same * numbers as in interface 0. Since it is interface 1 that the * USB core has most recently seen, these descriptors are now * associated with the endpoint numbers. This will foul up our * attempts to submit bulk/interrupt URBs to the endpoints in * interface 0, so we have to make sure that the USB core looks * again at interface 0 by calling usb_set_interface() on it. */ if (umidi->usb_id == USB_ID(0x07fd, 0x0001)) /* MOTU Fastlane */ usb_set_interface(umidi->dev, 0, 0); err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; case QUIRK_MIDI_EMAGIC: umidi->usb_protocol_ops = &snd_usbmidi_emagic_ops; memcpy(&endpoints[0], quirk->data, sizeof(struct snd_usb_midi_endpoint_info)); err = snd_usbmidi_detect_endpoints(umidi, &endpoints[0], 1); break; case QUIRK_MIDI_CME: umidi->usb_protocol_ops = &snd_usbmidi_cme_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; case QUIRK_MIDI_AKAI: umidi->usb_protocol_ops = &snd_usbmidi_akai_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); /* endpoint 1 is input-only */ endpoints[1].out_cables = 0; break; case QUIRK_MIDI_FTDI: umidi->usb_protocol_ops = &snd_usbmidi_ftdi_ops; /* set baud rate to 31250 (48 MHz / 16 / 96) */ err = usb_control_msg(umidi->dev, usb_sndctrlpipe(umidi->dev, 0), 3, 0x40, 0x60, 0, NULL, 0, 1000); if (err < 0) break; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; case QUIRK_MIDI_CH345: umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; default: dev_err(&umidi->dev->dev, "invalid quirk type %d\n", quirk->type); err = -ENXIO; break; } if (err < 0) goto free_midi; /* create rawmidi device */ out_ports = 0; in_ports = 0; for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { out_ports += hweight16(endpoints[i].out_cables); in_ports += hweight16(endpoints[i].in_cables); } err = snd_usbmidi_create_rawmidi(umidi, out_ports, in_ports); if (err < 0) goto free_midi; /* create endpoint/port structures */ if (quirk && quirk->type == QUIRK_MIDI_MIDIMAN) err = snd_usbmidi_create_endpoints_midiman(umidi, &endpoints[0]); else err = snd_usbmidi_create_endpoints(umidi, endpoints); if (err < 0) goto exit; usb_autopm_get_interface_no_resume(umidi->iface); list_add_tail(&umidi->list, midi_list); if (num_rawmidis) *num_rawmidis = umidi->next_midi_device; return 0; free_midi: kfree(umidi); exit: return err; } EXPORT_SYMBOL(__snd_usbmidi_create); |
2 2 7 7 6 5 4 2 2 2 5 1 1 1 5 981 26 440 7 1 3 18 2 6 1 1 3 5 8 2 5 8 5 6 1 6 6 4 6 1 19 9 8 6 4 4 3 6 20 20 2 1 1 1 3 3 1 1 3 1 2 6 46 1785 1786 440 981 382 439 46 46 649 651 230 15 2 1 1 1 15 4 4 16 15 15 1 16 16 16 16 16 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 | // SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Generic netlink support functions to configure an SMC-R PNET table * * Copyright IBM Corp. 2016 * * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> */ #include <linux/module.h> #include <linux/list.h> #include <linux/ctype.h> #include <linux/mutex.h> #include <net/netlink.h> #include <net/genetlink.h> #include <uapi/linux/if.h> #include <uapi/linux/smc.h> #include <rdma/ib_verbs.h> #include <net/netns/generic.h> #include "smc_netns.h" #include "smc_pnet.h" #include "smc_ib.h" #include "smc_ism.h" #include "smc_core.h" static struct net_device *__pnet_find_base_ndev(struct net_device *ndev); static struct net_device *pnet_find_base_ndev(struct net_device *ndev); static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, .len = SMC_MAX_PNETID_LEN }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [SMC_PNETID_IBNAME] = { .type = NLA_NUL_STRING, .len = IB_DEVICE_NAME_MAX - 1 }, [SMC_PNETID_IBPORT] = { .type = NLA_U8 } }; static struct genl_family smc_pnet_nl_family; enum smc_pnet_nametype { SMC_PNET_ETH = 1, SMC_PNET_IB = 2, }; /* pnet entry stored in pnet table */ struct smc_pnetentry { struct list_head list; char pnet_name[SMC_MAX_PNETID_LEN + 1]; enum smc_pnet_nametype type; union { struct { char eth_name[IFNAMSIZ + 1]; struct net_device *ndev; netdevice_tracker dev_tracker; }; struct { char ib_name[IB_DEVICE_NAME_MAX + 1]; u8 ib_port; }; }; }; /* Check if the pnetid is set */ bool smc_pnet_is_pnetid_set(u8 *pnetid) { if (pnetid[0] == 0 || pnetid[0] == _S) return false; return true; } /* Check if two given pnetids match */ static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) { int i; for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { if ((pnetid1[i] == 0 || pnetid1[i] == _S) && (pnetid2[i] == 0 || pnetid2[i] == _S)) break; if (pnetid1[i] != pnetid2[i]) return false; } return true; } /* Remove a pnetid from the pnet table. */ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) { struct smc_pnetentry *pnetelem, *tmp_pe; struct smc_pnettable *pnettable; struct smc_ib_device *ibdev; struct smcd_dev *smcd; struct smc_net *sn; int rc = -ENOENT; int ibport; /* get pnettable for namespace */ sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; /* remove table entry */ mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (!pnet_name || smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { netdev_put(pnetelem->ndev, &pnetelem->dev_tracker); pr_warn_ratelimited("smc: net device %s " "erased user defined " "pnetid %.16s\n", pnetelem->eth_name, pnetelem->pnet_name); } kfree(pnetelem); rc = 0; } } mutex_unlock(&pnettable->lock); /* if this is not the initial namespace, stop here */ if (net != &init_net) return rc; /* remove ib devices */ mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { if (ibdev->pnetid_by_user[ibport] && (!pnet_name || smc_pnet_match(pnet_name, ibdev->pnetid[ibport]))) { pr_warn_ratelimited("smc: ib device %s ibport " "%d erased user defined " "pnetid %.16s\n", ibdev->ibdev->name, ibport + 1, ibdev->pnetid[ibport]); memset(ibdev->pnetid[ibport], 0, SMC_MAX_PNETID_LEN); ibdev->pnetid_by_user[ibport] = false; rc = 0; } } } mutex_unlock(&smc_ib_devices.mutex); /* remove smcd devices */ mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd, &smcd_dev_list.list, list) { if (smcd->pnetid_by_user && (!pnet_name || smc_pnet_match(pnet_name, smcd->pnetid))) { pr_warn_ratelimited("smc: smcd device %s " "erased user defined pnetid " "%.16s\n", dev_name(smcd->ops->get_dev(smcd)), smcd->pnetid); memset(smcd->pnetid, 0, SMC_MAX_PNETID_LEN); smcd->pnetid_by_user = false; rc = 0; } } mutex_unlock(&smcd_dev_list.mutex); return rc; } /* Add the reference to a given network device to the pnet table. */ static int smc_pnet_add_by_ndev(struct net_device *ndev) { struct smc_pnetentry *pnetelem, *tmp_pe; struct smc_pnettable *pnettable; struct net *net = dev_net(ndev); struct smc_net *sn; int rc = -ENOENT; /* get pnettable for namespace */ sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { netdev_hold(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); pnetelem->ndev = ndev; rc = 0; pr_warn_ratelimited("smc: adding net device %s with " "user defined pnetid %.16s\n", pnetelem->eth_name, pnetelem->pnet_name); break; } } mutex_unlock(&pnettable->lock); return rc; } /* Remove the reference to a given network device from the pnet table. */ static int smc_pnet_remove_by_ndev(struct net_device *ndev) { struct smc_pnetentry *pnetelem, *tmp_pe; struct smc_pnettable *pnettable; struct net *net = dev_net(ndev); struct smc_net *sn; int rc = -ENOENT; /* get pnettable for namespace */ sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { netdev_put(pnetelem->ndev, &pnetelem->dev_tracker); pnetelem->ndev = NULL; rc = 0; pr_warn_ratelimited("smc: removing net device %s with " "user defined pnetid %.16s\n", pnetelem->eth_name, pnetelem->pnet_name); break; } } mutex_unlock(&pnettable->lock); return rc; } /* Apply pnetid to ib device when no pnetid is set. */ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, char *pnet_name) { bool applied = false; mutex_lock(&smc_ib_devices.mutex); if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) { memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, SMC_MAX_PNETID_LEN); ib_dev->pnetid_by_user[ib_port - 1] = true; applied = true; } mutex_unlock(&smc_ib_devices.mutex); return applied; } /* Apply pnetid to smcd device when no pnetid is set. */ static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) { bool applied = false; mutex_lock(&smcd_dev_list.mutex); if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) { memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); smcd_dev->pnetid_by_user = true; applied = true; } mutex_unlock(&smcd_dev_list.mutex); return applied; } /* The limit for pnetid is 16 characters. * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. * Lower case letters are converted to upper case. * Interior blanks should not be used. */ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) { char *bf = skip_spaces(pnet_name); size_t len = strlen(bf); char *end = bf + len; if (!len) return false; while (--end >= bf && isspace(*end)) ; if (end - bf >= SMC_MAX_PNETID_LEN) return false; while (bf <= end) { if (!isalnum(*bf)) return false; *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; bf++; } *pnetid = '\0'; return true; } /* Find an infiniband device by a given name. The device might not exist. */ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) { struct smc_ib_device *ibdev; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || (ibdev->ibdev->dev.parent && !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, IB_DEVICE_NAME_MAX - 1))) { goto out; } } ibdev = NULL; out: mutex_unlock(&smc_ib_devices.mutex); return ibdev; } /* Find an smcd device by a given name. The device might not exist. */ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) { struct smcd_dev *smcd_dev; mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { if (!strncmp(dev_name(smcd_dev->ops->get_dev(smcd_dev)), smcd_name, IB_DEVICE_NAME_MAX - 1)) goto out; } smcd_dev = NULL; out: mutex_unlock(&smcd_dev_list.mutex); return smcd_dev; } static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, char *eth_name, char *pnet_name) { struct smc_pnetentry *tmp_pe, *new_pe; struct net_device *ndev, *base_ndev; u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; bool new_netdev; int rc; /* check if (base) netdev already has a pnetid. If there is one, we do * not want to add a pnet table entry */ rc = -EEXIST; ndev = dev_get_by_name(net, eth_name); /* dev_hold() */ if (ndev) { base_ndev = pnet_find_base_ndev(ndev); if (!smc_pnetid_by_dev_port(base_ndev->dev.parent, base_ndev->dev_port, ndev_pnetid)) goto out_put; } /* add a new netdev entry to the pnet table if there isn't one */ rc = -ENOMEM; new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); if (!new_pe) goto out_put; new_pe->type = SMC_PNET_ETH; memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); rc = -EEXIST; new_netdev = true; mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_ETH && !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { new_netdev = false; break; } } if (new_netdev) { if (ndev) { new_pe->ndev = ndev; netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_ATOMIC); } list_add_tail(&new_pe->list, &pnettable->pnetlist); mutex_unlock(&pnettable->lock); } else { mutex_unlock(&pnettable->lock); kfree(new_pe); goto out_put; } if (ndev) pr_warn_ratelimited("smc: net device %s " "applied user defined pnetid %.16s\n", new_pe->eth_name, new_pe->pnet_name); return 0; out_put: dev_put(ndev); return rc; } static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, u8 ib_port, char *pnet_name) { struct smc_pnetentry *tmp_pe, *new_pe; struct smc_ib_device *ib_dev; bool smcddev_applied = true; bool ibdev_applied = true; struct smcd_dev *smcd; struct device *dev; bool new_ibdev; /* try to apply the pnetid to active devices */ ib_dev = smc_pnet_find_ib(ib_name); if (ib_dev) { ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); if (ibdev_applied) pr_warn_ratelimited("smc: ib device %s ibport %d " "applied user defined pnetid " "%.16s\n", ib_dev->ibdev->name, ib_port, ib_dev->pnetid[ib_port - 1]); } smcd = smc_pnet_find_smcd(ib_name); if (smcd) { smcddev_applied = smc_pnet_apply_smcd(smcd, pnet_name); if (smcddev_applied) { dev = smcd->ops->get_dev(smcd); pr_warn_ratelimited("smc: smcd device %s " "applied user defined pnetid " "%.16s\n", dev_name(dev), smcd->pnetid); } } /* Apply fails when a device has a hardware-defined pnetid set, do not * add a pnet table entry in that case. */ if (!ibdev_applied || !smcddev_applied) return -EEXIST; /* add a new ib entry to the pnet table if there isn't one */ new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); if (!new_pe) return -ENOMEM; new_pe->type = SMC_PNET_IB; memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX); new_pe->ib_port = ib_port; new_ibdev = true; mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { new_ibdev = false; break; } } if (new_ibdev) { list_add_tail(&new_pe->list, &pnettable->pnetlist); mutex_unlock(&pnettable->lock); } else { mutex_unlock(&pnettable->lock); kfree(new_pe); } return (new_ibdev) ? 0 : -EEXIST; } /* Append a pnetid to the end of the pnet table if not already on this list. */ static int smc_pnet_enter(struct net *net, struct nlattr *tb[]) { char pnet_name[SMC_MAX_PNETID_LEN + 1]; struct smc_pnettable *pnettable; bool new_netdev = false; bool new_ibdev = false; struct smc_net *sn; u8 ibport = 1; char *string; int rc; /* get pnettable for namespace */ sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; rc = -EINVAL; if (!tb[SMC_PNETID_NAME]) goto error; string = (char *)nla_data(tb[SMC_PNETID_NAME]); if (!smc_pnetid_valid(string, pnet_name)) goto error; if (tb[SMC_PNETID_ETHNAME]) { string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); rc = smc_pnet_add_eth(pnettable, net, string, pnet_name); if (!rc) new_netdev = true; else if (rc != -EEXIST) goto error; } /* if this is not the initial namespace, stop here */ if (net != &init_net) return new_netdev ? 0 : -EEXIST; rc = -EINVAL; if (tb[SMC_PNETID_IBNAME]) { string = (char *)nla_data(tb[SMC_PNETID_IBNAME]); string = strim(string); if (tb[SMC_PNETID_IBPORT]) { ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]); if (ibport < 1 || ibport > SMC_MAX_PORTS) goto error; } rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name); if (!rc) new_ibdev = true; else if (rc != -EEXIST) goto error; } return (new_netdev || new_ibdev) ? 0 : -EEXIST; error: return rc; } /* Convert an smc_pnetentry to a netlink attribute sequence */ static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem) { if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) return -1; if (pnetelem->type == SMC_PNET_ETH) { if (nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->eth_name)) return -1; } else { if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) return -1; } if (pnetelem->type == SMC_PNET_IB) { if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) || nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) return -1; } else { if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) return -1; } return 0; } static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); return smc_pnet_enter(net, info->attrs); } static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); if (!info->attrs[SMC_PNETID_NAME]) return -EINVAL; return smc_pnet_remove_by_pnetid(net, (char *)nla_data(info->attrs[SMC_PNETID_NAME])); } static int smc_pnet_dump_start(struct netlink_callback *cb) { cb->args[0] = 0; return 0; } static int smc_pnet_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, u32 flags, struct smc_pnetentry *pnetelem) { void *hdr; hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, flags, SMC_PNETID_GET); if (!hdr) return -ENOMEM; if (smc_pnet_set_nla(skb, pnetelem) < 0) { genlmsg_cancel(skb, hdr); return -EMSGSIZE; } genlmsg_end(skb, hdr); return 0; } static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, int start_idx) { struct smc_pnettable *pnettable; struct smc_pnetentry *pnetelem; struct smc_net *sn; int idx = 0; /* get pnettable for namespace */ sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; /* dump pnettable entries */ mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; if (idx++ < start_idx) continue; /* if this is not the initial namespace, dump only netdev */ if (net != &init_net && pnetelem->type != SMC_PNET_ETH) continue; if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, pnetelem)) { --idx; break; } } mutex_unlock(&pnettable->lock); return idx; } static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx; idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NULL, cb->args[0]); cb->args[0] = idx; return skb->len; } /* Retrieve one PNETID entry */ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct sk_buff *msg; void *hdr; if (!info->attrs[SMC_PNETID_NAME]) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, nla_data(info->attrs[SMC_PNETID_NAME]), 0); /* finish multi part message and send it */ hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, NLM_F_MULTI); if (!hdr) { nlmsg_free(msg); return -EMSGSIZE; } return genlmsg_reply(msg, info); } /* Remove and delete all pnetids from pnet table. */ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); smc_pnet_remove_by_pnetid(net, NULL); return 0; } /* SMC_PNETID generic netlink operation definition */ static const struct genl_ops smc_pnet_ops[] = { { .cmd = SMC_PNETID_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ .doit = smc_pnet_get, .dumpit = smc_pnet_dump, .start = smc_pnet_dump_start }, { .cmd = SMC_PNETID_ADD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = smc_pnet_add }, { .cmd = SMC_PNETID_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = smc_pnet_del }, { .cmd = SMC_PNETID_FLUSH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = smc_pnet_flush } }; /* SMC_PNETID family definition */ static struct genl_family smc_pnet_nl_family __ro_after_init = { .hdrsize = 0, .name = SMCR_GENL_FAMILY_NAME, .version = SMCR_GENL_FAMILY_VERSION, .maxattr = SMC_PNETID_MAX, .policy = smc_pnet_policy, .netnsok = true, .module = THIS_MODULE, .ops = smc_pnet_ops, .n_ops = ARRAY_SIZE(smc_pnet_ops), .resv_start_op = SMC_PNETID_FLUSH + 1, }; bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid) { struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnetids_ndev_entry *pe; bool rc = false; read_lock(&sn->pnetids_ndev.lock); list_for_each_entry(pe, &sn->pnetids_ndev.list, list) { if (smc_pnet_match(pnetid, pe->pnetid)) { rc = true; goto unlock; } } unlock: read_unlock(&sn->pnetids_ndev.lock); return rc; } static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid) { struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnetids_ndev_entry *pe, *pi; pe = kzalloc(sizeof(*pe), GFP_KERNEL); if (!pe) return -ENOMEM; write_lock(&sn->pnetids_ndev.lock); list_for_each_entry(pi, &sn->pnetids_ndev.list, list) { if (smc_pnet_match(pnetid, pe->pnetid)) { refcount_inc(&pi->refcnt); kfree(pe); goto unlock; } } refcount_set(&pe->refcnt, 1); memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN); list_add_tail(&pe->list, &sn->pnetids_ndev.list); unlock: write_unlock(&sn->pnetids_ndev.lock); return 0; } static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid) { struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnetids_ndev_entry *pe, *pe2; write_lock(&sn->pnetids_ndev.lock); list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) { if (smc_pnet_match(pnetid, pe->pnetid)) { if (refcount_dec_and_test(&pe->refcnt)) { list_del(&pe->list); kfree(pe); } break; } } write_unlock(&sn->pnetids_ndev.lock); } static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev, u8 *ndev_pnetid) { struct net_device *base_dev; base_dev = __pnet_find_base_ndev(dev); if (base_dev->flags & IFF_UP && !smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port, ndev_pnetid)) { /* add to PNETIDs list */ smc_pnet_add_pnetid(net, ndev_pnetid); } } /* create initial list of netdevice pnetids */ static void smc_pnet_create_pnetids_list(struct net *net) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev; /* Newly created netns do not have devices. * Do not even acquire rtnl. */ if (list_empty(&net->dev_base_head)) return; /* Note: This might not be needed, because smc_pnet_netdev_event() * is also calling smc_pnet_add_base_pnetid() when handling * NETDEV_UP event. */ rtnl_lock(); for_each_netdev(net, dev) smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); rtnl_unlock(); } /* clean up list of netdevice pnetids */ static void smc_pnet_destroy_pnetids_list(struct net *net) { struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnetids_ndev_entry *pe, *temp_pe; write_lock(&sn->pnetids_ndev.lock); list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) { list_del(&pe->list); kfree(pe); } write_unlock(&sn->pnetids_ndev.lock); } static int smc_pnet_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(event_dev); u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; switch (event) { case NETDEV_REBOOT: case NETDEV_UNREGISTER: smc_pnet_remove_by_ndev(event_dev); smc_ib_ndev_change(event_dev, event); return NOTIFY_OK; case NETDEV_REGISTER: smc_pnet_add_by_ndev(event_dev); smc_ib_ndev_change(event_dev, event); return NOTIFY_OK; case NETDEV_UP: smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid); return NOTIFY_OK; case NETDEV_DOWN: event_dev = __pnet_find_base_ndev(event_dev); if (!smc_pnetid_by_dev_port(event_dev->dev.parent, event_dev->dev_port, ndev_pnetid)) { /* remove from PNETIDs list */ smc_pnet_remove_pnetid(net, ndev_pnetid); } return NOTIFY_OK; default: return NOTIFY_DONE; } } static struct notifier_block smc_netdev_notifier = { .notifier_call = smc_pnet_netdev_event }; /* init network namespace */ int smc_pnet_net_init(struct net *net) { struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnettable *pnettable = &sn->pnettable; struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev; INIT_LIST_HEAD(&pnettable->pnetlist); mutex_init(&pnettable->lock); INIT_LIST_HEAD(&pnetids_ndev->list); rwlock_init(&pnetids_ndev->lock); smc_pnet_create_pnetids_list(net); return 0; } int __init smc_pnet_init(void) { int rc; rc = genl_register_family(&smc_pnet_nl_family); if (rc) return rc; rc = register_netdevice_notifier(&smc_netdev_notifier); if (rc) genl_unregister_family(&smc_pnet_nl_family); return rc; } /* exit network namespace */ void smc_pnet_net_exit(struct net *net) { /* flush pnet table */ smc_pnet_remove_by_pnetid(net, NULL); smc_pnet_destroy_pnetids_list(net); } void smc_pnet_exit(void) { unregister_netdevice_notifier(&smc_netdev_notifier); genl_unregister_family(&smc_pnet_nl_family); } static struct net_device *__pnet_find_base_ndev(struct net_device *ndev) { int i, nest_lvl; ASSERT_RTNL(); nest_lvl = ndev->lower_level; for (i = 0; i < nest_lvl; i++) { struct list_head *lower = &ndev->adj_list.lower; if (list_empty(lower)) break; lower = lower->next; ndev = netdev_lower_get_next(ndev, &lower); } return ndev; } /* Determine one base device for stacked net devices. * If the lower device level contains more than one devices * (for instance with bonding slaves), just the first device * is used to reach a base device. */ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) { rtnl_lock(); ndev = __pnet_find_base_ndev(ndev); rtnl_unlock(); return ndev; } static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, u8 *pnetid) { struct smc_pnettable *pnettable; struct net *net = dev_net(ndev); struct smc_pnetentry *pnetelem; struct smc_net *sn; int rc = -ENOENT; /* get pnettable for namespace */ sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { /* get pnetid of netdev device */ memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); rc = 0; break; } } mutex_unlock(&pnettable->lock); return rc; } static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i, struct smc_init_info *ini) { if (!ini->check_smcrv2 && !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL, NULL)) { ini->ib_dev = ibdev; ini->ib_port = i; return 0; } if (ini->check_smcrv2 && !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2, NULL, &ini->smcrv2)) { ini->smcrv2.ib_dev_v2 = ibdev; ini->smcrv2.ib_port_v2 = i; return 0; } return -ENODEV; } /* find a roce device for the given pnetid */ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_init_info *ini, struct smc_ib_device *known_dev, struct net *net) { struct smc_ib_device *ibdev; int i; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (ibdev == known_dev || !rdma_dev_access_netns(ibdev->ibdev, net)) continue; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && smc_ib_port_active(ibdev, i) && !test_bit(i - 1, ibdev->ports_going_away)) { if (!smc_pnet_determine_gid(ibdev, i, ini)) goto out; } } } out: mutex_unlock(&smc_ib_devices.mutex); } /* find alternate roce device with same pnet_id, vlan_id and net namespace */ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, struct smc_init_info *ini, struct smc_ib_device *known_dev) { struct net *net = lgr->net; _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev, net); } /* if handshake network device belongs to a roce device, return its * IB device and port */ static void smc_pnet_find_rdma_dev(struct net_device *netdev, struct smc_init_info *ini) { struct net *net = dev_net(netdev); struct smc_ib_device *ibdev; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { struct net_device *ndev; int i; /* check rdma net namespace */ if (!rdma_dev_access_netns(ibdev->ibdev, net)) continue; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; if (!ibdev->ibdev->ops.get_netdev) continue; ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); if (!ndev) continue; dev_put(ndev); if (netdev == ndev && smc_ib_port_active(ibdev, i) && !test_bit(i - 1, ibdev->ports_going_away)) { if (!smc_pnet_determine_gid(ibdev, i, ini)) break; } } } mutex_unlock(&smc_ib_devices.mutex); } /* Determine the corresponding IB device port based on the hardware PNETID. * Searching stops at the first matching active IB device port with vlan_id * configured. * If nothing found, check pnetid table. * If nothing found, try to use handshake device */ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net *net; ndev = pnet_find_base_ndev(ndev); net = dev_net(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { smc_pnet_find_rdma_dev(ndev, ini); return; /* pnetid could not be determined */ } _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net); } static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct smcd_dev *ismdev; ndev = pnet_find_base_ndev(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) return; /* pnetid could not be determined */ mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away && (!ini->ism_peer_gid[0].gid || !smc_ism_cantalk(&ini->ism_peer_gid[0], ini->vlan_id, ismdev))) { ini->ism_dev[0] = ismdev; break; } } mutex_unlock(&smcd_dev_list.mutex); } /* PNET table analysis for a given sock: * determine ib_device and port belonging to used internal TCP socket * ethernet interface. */ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); if (!dst) goto out; if (!dst->dev) goto out_rel; smc_pnet_find_roce_by_pnetid(dst->dev, ini); out_rel: dst_release(dst); out: return; } void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); ini->ism_dev[0] = NULL; if (!dst) goto out; if (!dst->dev) goto out_rel; smc_pnet_find_ism_by_pnetid(dst->dev, ini); out_rel: dst_release(dst); out: return; } /* Lookup and apply a pnet table entry to the given ib device. */ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) { char *ib_name = smcibdev->ibdev->name; struct smc_pnettable *pnettable; struct smc_pnetentry *tmp_pe; struct smc_net *sn; int rc = -ENOENT; /* get pnettable for init namespace */ sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && tmp_pe->ib_port == ib_port) { smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name); rc = 0; break; } } mutex_unlock(&pnettable->lock); return rc; } /* Lookup and apply a pnet table entry to the given smcd device. */ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) { const char *ib_name = dev_name(smcddev->ops->get_dev(smcddev)); struct smc_pnettable *pnettable; struct smc_pnetentry *tmp_pe; struct smc_net *sn; int rc = -ENOENT; /* get pnettable for init namespace */ sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name); rc = 0; break; } } mutex_unlock(&pnettable->lock); return rc; } |
26 6 9 3 10 138 133 14 14 2 63 25 33 7 21 5 107 20 111 40 70 158 23 115 10 1 124 1 18 3 12 1 7 111 5 8 4 24 9 144 114 114 13 118 7 17 104 15 116 130 14 103 126 1 3 5 7 7 2 7 22 2 2 1 11 10 41 88 81 72 2 22 86 7 1 4 4 77 6 2 239 15 6 58 70 4 3 13 26 29 38 3 12 12 1 4 2 2 1 1 4 8 10 9 12 2 6 7 7 7 4 1 7 3 5 4 4 6 15 22 9 23 23 8 15 8 7 23 6 17 19 19 6 59 53 4 8 6 23 25 29 29 25 2 2 1 1 19 6 6 6 4 6 1 6 3 4 5 3 4 2 92 15 10 4 45 9 57 8 43 3 5 12 60 5 3 2 2 3 3 8 34 29 6 12 22 9 17 22 22 32 2 1 1 3 3 3 113 114 112 2 103 14 3 92 33 135 20 3 3 111 3 114 70 44 135 4 40 136 110 30 131 2 17 2 137 134 135 135 70 6 63 135 3 135 2 143 129 127 2 87 18 24 83 93 13 20 11 4 74 24 83 20 88 7 57 112 45 133 42 91 81 81 70 72 43 6 22 22 22 10 10 10 5 8 7 2 5 1 1 1 60 10 2 3 10 1 60 6 55 24 2 43 46 3 46 38 3 1 2 3 5 32 23 6 20 23 5 14 6 25 18 189 183 5 189 6 189 141 147 76 18 23 75 44 31 100 15 36 29 1 70 70 70 70 68 70 6 1 69 2 2 1 2 164 5 4 1 154 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 | // SPDX-License-Identifier: GPL-1.0+ /* * n_tty.c --- implements the N_TTY line discipline. * * This code used to be in tty_io.c, but things are getting hairy * enough that it made sense to split things off. (The N_TTY * processing has changed so much that it's hardly recognizable, * anyway...) * * Note that the open routine for N_TTY is guaranteed never to return * an error. This is because Linux will fall back to setting a line * to N_TTY if it can not switch to any other line discipline. * * Written by Theodore Ts'o, Copyright 1994. * * This file also contains code originally written by Linus Torvalds, * Copyright 1991, 1992, 1993, and by Julian Cowley, Copyright 1994. * * Reduced memory usage for older ARM systems - Russell King. * * 2000/01/20 Fixed SMP locking on put_tty_queue using bits of * the patch by Andrew J. Kroll <ag784@freenet.buffalo.edu> * who actually finally proved there really was a race. * * 2002/03/18 Implemented n_tty_wakeup to send SIGIO POLL_OUTs to * waiting writing processes-Sapan Bhatia <sapan@corewars.org>. * Also fixed a bug in BLOCKING mode where n_tty_write returns * EAGAIN */ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/fcntl.h> #include <linux/file.h> #include <linux/jiffies.h> #include <linux/math.h> #include <linux/poll.h> #include <linux/ratelimit.h> #include <linux/sched.h> #include <linux/signal.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/tty.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include "tty.h" /* * Until this number of characters is queued in the xmit buffer, select will * return "we have room for writes". */ #define WAKEUP_CHARS 256 /* * This defines the low- and high-watermarks for throttling and * unthrottling the TTY driver. These watermarks are used for * controlling the space in the read buffer. */ #define TTY_THRESHOLD_THROTTLE 128 /* now based on remaining room */ #define TTY_THRESHOLD_UNTHROTTLE 128 /* * Special byte codes used in the echo buffer to represent operations * or special handling of characters. Bytes in the echo buffer that * are not part of such special blocks are treated as normal character * codes. */ #define ECHO_OP_START 0xff #define ECHO_OP_MOVE_BACK_COL 0x80 #define ECHO_OP_SET_CANON_COL 0x81 #define ECHO_OP_ERASE_TAB 0x82 #define ECHO_COMMIT_WATERMARK 256 #define ECHO_BLOCK 256 #define ECHO_DISCARD_WATERMARK N_TTY_BUF_SIZE - (ECHO_BLOCK + 32) #undef N_TTY_TRACE #ifdef N_TTY_TRACE # define n_tty_trace(f, args...) trace_printk(f, ##args) #else # define n_tty_trace(f, args...) no_printk(f, ##args) #endif struct n_tty_data { /* producer-published */ size_t read_head; size_t commit_head; size_t canon_head; size_t echo_head; size_t echo_commit; size_t echo_mark; DECLARE_BITMAP(char_map, 256); /* private to n_tty_receive_overrun (single-threaded) */ unsigned long overrun_time; unsigned int num_overrun; /* non-atomic */ bool no_room; /* must hold exclusive termios_rwsem to reset these */ unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1; unsigned char push:1; /* shared by producer and consumer */ u8 read_buf[N_TTY_BUF_SIZE]; DECLARE_BITMAP(read_flags, N_TTY_BUF_SIZE); u8 echo_buf[N_TTY_BUF_SIZE]; /* consumer-published */ size_t read_tail; size_t line_start; /* # of chars looked ahead (to find software flow control chars) */ size_t lookahead_count; /* protected by output lock */ unsigned int column; unsigned int canon_column; size_t echo_tail; struct mutex atomic_read_lock; struct mutex output_lock; }; #define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1)) static inline size_t read_cnt(struct n_tty_data *ldata) { return ldata->read_head - ldata->read_tail; } static inline u8 read_buf(struct n_tty_data *ldata, size_t i) { return ldata->read_buf[MASK(i)]; } static inline u8 *read_buf_addr(struct n_tty_data *ldata, size_t i) { return &ldata->read_buf[MASK(i)]; } static inline u8 echo_buf(struct n_tty_data *ldata, size_t i) { smp_rmb(); /* Matches smp_wmb() in add_echo_byte(). */ return ldata->echo_buf[MASK(i)]; } static inline u8 *echo_buf_addr(struct n_tty_data *ldata, size_t i) { return &ldata->echo_buf[MASK(i)]; } /* If we are not echoing the data, perhaps this is a secret so erase it */ static void zero_buffer(const struct tty_struct *tty, u8 *buffer, size_t size) { if (L_ICANON(tty) && !L_ECHO(tty)) memset(buffer, 0, size); } static void tty_copy(const struct tty_struct *tty, void *to, size_t tail, size_t n) { struct n_tty_data *ldata = tty->disc_data; size_t size = N_TTY_BUF_SIZE - tail; void *from = read_buf_addr(ldata, tail); if (n > size) { tty_audit_add_data(tty, from, size); memcpy(to, from, size); zero_buffer(tty, from, size); to += size; n -= size; from = ldata->read_buf; } tty_audit_add_data(tty, from, n); memcpy(to, from, n); zero_buffer(tty, from, n); } /** * n_tty_kick_worker - start input worker (if required) * @tty: terminal * * Re-schedules the flip buffer work if it may have stopped. * * Locking: * * Caller holds exclusive %termios_rwsem, or * * n_tty_read()/consumer path: * holds non-exclusive %termios_rwsem */ static void n_tty_kick_worker(const struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; /* Did the input worker stop? Restart it */ if (unlikely(READ_ONCE(ldata->no_room))) { WRITE_ONCE(ldata->no_room, 0); WARN_RATELIMIT(tty->port->itty == NULL, "scheduling with invalid itty\n"); /* see if ldisc has been killed - if so, this means that * even though the ldisc has been halted and ->buf.work * cancelled, ->buf.work is about to be rescheduled */ WARN_RATELIMIT(test_bit(TTY_LDISC_HALTED, &tty->flags), "scheduling buffer work for halted ldisc\n"); tty_buffer_restart_work(tty->port); } } static ssize_t chars_in_buffer(const struct tty_struct *tty) { const struct n_tty_data *ldata = tty->disc_data; size_t head = ldata->icanon ? ldata->canon_head : ldata->commit_head; return head - ldata->read_tail; } /** * n_tty_write_wakeup - asynchronous I/O notifier * @tty: tty device * * Required for the ptys, serial driver etc. since processes that attach * themselves to the master and rely on ASYNC IO must be woken up. */ static void n_tty_write_wakeup(struct tty_struct *tty) { clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); kill_fasync(&tty->fasync, SIGIO, POLL_OUT); } static void n_tty_check_throttle(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; /* * Check the remaining room for the input canonicalization * mode. We don't want to throttle the driver if we're in * canonical mode and don't have a newline yet! */ if (ldata->icanon && ldata->canon_head == ldata->read_tail) return; do { tty_set_flow_change(tty, TTY_THROTTLE_SAFE); if (N_TTY_BUF_SIZE - read_cnt(ldata) >= TTY_THRESHOLD_THROTTLE) break; } while (!tty_throttle_safe(tty)); __tty_set_flow_change(tty, 0); } static void n_tty_check_unthrottle(struct tty_struct *tty) { if (tty->driver->type == TTY_DRIVER_TYPE_PTY) { if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE) return; n_tty_kick_worker(tty); tty_wakeup(tty->link); return; } /* If there is enough space in the read buffer now, let the * low-level driver know. We use chars_in_buffer() to * check the buffer, as it now knows about canonical mode. * Otherwise, if the driver is throttled and the line is * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode, * we won't get any more characters. */ do { tty_set_flow_change(tty, TTY_UNTHROTTLE_SAFE); if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE) break; n_tty_kick_worker(tty); } while (!tty_unthrottle_safe(tty)); __tty_set_flow_change(tty, 0); } /** * put_tty_queue - add character to tty * @c: character * @ldata: n_tty data * * Add a character to the tty read_buf queue. * * Locking: * * n_tty_receive_buf()/producer path: * caller holds non-exclusive %termios_rwsem */ static inline void put_tty_queue(u8 c, struct n_tty_data *ldata) { *read_buf_addr(ldata, ldata->read_head) = c; ldata->read_head++; } /** * reset_buffer_flags - reset buffer state * @ldata: line disc data to reset * * Reset the read buffer counters and clear the flags. Called from * n_tty_open() and n_tty_flush_buffer(). * * Locking: * * caller holds exclusive %termios_rwsem, or * * (locking is not required) */ static void reset_buffer_flags(struct n_tty_data *ldata) { ldata->read_head = ldata->canon_head = ldata->read_tail = 0; ldata->commit_head = 0; ldata->line_start = 0; ldata->erasing = 0; bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->push = 0; ldata->lookahead_count = 0; } static void n_tty_packet_mode_flush(struct tty_struct *tty) { unsigned long flags; if (tty->link->ctrl.packet) { spin_lock_irqsave(&tty->ctrl.lock, flags); tty->ctrl.pktstatus |= TIOCPKT_FLUSHREAD; spin_unlock_irqrestore(&tty->ctrl.lock, flags); wake_up_interruptible(&tty->link->read_wait); } } /** * n_tty_flush_buffer - clean input queue * @tty: terminal device * * Flush the input buffer. Called when the tty layer wants the buffer flushed * (eg at hangup) or when the %N_TTY line discipline internally has to clean * the pending queue (for example some signals). * * Holds %termios_rwsem to exclude producer/consumer while buffer indices are * reset. * * Locking: %ctrl.lock, exclusive %termios_rwsem */ static void n_tty_flush_buffer(struct tty_struct *tty) { down_write(&tty->termios_rwsem); reset_buffer_flags(tty->disc_data); n_tty_kick_worker(tty); if (tty->link) n_tty_packet_mode_flush(tty); up_write(&tty->termios_rwsem); } /** * is_utf8_continuation - utf8 multibyte check * @c: byte to check * * Returns: true if the utf8 character @c is a multibyte continuation * character. We use this to correctly compute the on-screen size of the * character when printing. */ static inline int is_utf8_continuation(u8 c) { return (c & 0xc0) == 0x80; } /** * is_continuation - multibyte check * @c: byte to check * @tty: terminal device * * Returns: true if the utf8 character @c is a multibyte continuation character * and the terminal is in unicode mode. */ static inline int is_continuation(u8 c, const struct tty_struct *tty) { return I_IUTF8(tty) && is_utf8_continuation(c); } /** * do_output_char - output one character * @c: character (or partial unicode symbol) * @tty: terminal device * @space: space available in tty driver write buffer * * This is a helper function that handles one output character (including * special characters like TAB, CR, LF, etc.), doing OPOST processing and * putting the results in the tty driver's write buffer. * * Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY and NLDLY. * They simply aren't relevant in the world today. If you ever need them, add * them here. * * Returns: the number of bytes of buffer space used or -1 if no space left. * * Locking: should be called under the %output_lock to protect the column state * and space left in the buffer. */ static int do_output_char(u8 c, struct tty_struct *tty, int space) { struct n_tty_data *ldata = tty->disc_data; int spaces; if (!space) return -1; switch (c) { case '\n': if (O_ONLRET(tty)) ldata->column = 0; if (O_ONLCR(tty)) { if (space < 2) return -1; ldata->canon_column = ldata->column = 0; tty->ops->write(tty, "\r\n", 2); return 2; } ldata->canon_column = ldata->column; break; case '\r': if (O_ONOCR(tty) && ldata->column == 0) return 0; if (O_OCRNL(tty)) { c = '\n'; if (O_ONLRET(tty)) ldata->canon_column = ldata->column = 0; break; } ldata->canon_column = ldata->column = 0; break; case '\t': spaces = 8 - (ldata->column & 7); if (O_TABDLY(tty) == XTABS) { if (space < spaces) return -1; ldata->column += spaces; tty->ops->write(tty, " ", spaces); return spaces; } ldata->column += spaces; break; case '\b': if (ldata->column > 0) ldata->column--; break; default: if (!iscntrl(c)) { if (O_OLCUC(tty)) c = toupper(c); if (!is_continuation(c, tty)) ldata->column++; } break; } tty_put_char(tty, c); return 1; } /** * process_output - output post processor * @c: character (or partial unicode symbol) * @tty: terminal device * * Output one character with OPOST processing. * * Returns: -1 when the output device is full and the character must be * retried. * * Locking: %output_lock to protect column state and space left (also, this is *called from n_tty_write() under the tty layer write lock). */ static int process_output(u8 c, struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; int space, retval; mutex_lock(&ldata->output_lock); space = tty_write_room(tty); retval = do_output_char(c, tty, space); mutex_unlock(&ldata->output_lock); if (retval < 0) return -1; else return 0; } /** * process_output_block - block post processor * @tty: terminal device * @buf: character buffer * @nr: number of bytes to output * * Output a block of characters with OPOST processing. * * This path is used to speed up block console writes, among other things when * processing blocks of output data. It handles only the simple cases normally * found and helps to generate blocks of symbols for the console driver and * thus improve performance. * * Returns: the number of characters output. * * Locking: %output_lock to protect column state and space left (also, this is * called from n_tty_write() under the tty layer write lock). */ static ssize_t process_output_block(struct tty_struct *tty, const u8 *buf, unsigned int nr) { struct n_tty_data *ldata = tty->disc_data; int space; int i; const u8 *cp; mutex_lock(&ldata->output_lock); space = tty_write_room(tty); if (space <= 0) { mutex_unlock(&ldata->output_lock); return space; } if (nr > space) nr = space; for (i = 0, cp = buf; i < nr; i++, cp++) { u8 c = *cp; switch (c) { case '\n': if (O_ONLRET(tty)) ldata->column = 0; if (O_ONLCR(tty)) goto break_out; ldata->canon_column = ldata->column; break; case '\r': if (O_ONOCR(tty) && ldata->column == 0) goto break_out; if (O_OCRNL(tty)) goto break_out; ldata->canon_column = ldata->column = 0; break; case '\t': goto break_out; case '\b': if (ldata->column > 0) ldata->column--; break; default: if (!iscntrl(c)) { if (O_OLCUC(tty)) goto break_out; if (!is_continuation(c, tty)) ldata->column++; } break; } } break_out: i = tty->ops->write(tty, buf, i); mutex_unlock(&ldata->output_lock); return i; } static int n_tty_process_echo_ops(struct tty_struct *tty, size_t *tail, int space) { struct n_tty_data *ldata = tty->disc_data; u8 op; /* * Since add_echo_byte() is called without holding output_lock, we * might see only portion of multi-byte operation. */ if (MASK(ldata->echo_commit) == MASK(*tail + 1)) return -ENODATA; /* * If the buffer byte is the start of a multi-byte operation, get the * next byte, which is either the op code or a control character value. */ op = echo_buf(ldata, *tail + 1); switch (op) { case ECHO_OP_ERASE_TAB: { unsigned int num_chars, num_bs; if (MASK(ldata->echo_commit) == MASK(*tail + 2)) return -ENODATA; num_chars = echo_buf(ldata, *tail + 2); /* * Determine how many columns to go back in order to erase the * tab. This depends on the number of columns used by other * characters within the tab area. If this (modulo 8) count is * from the start of input rather than from a previous tab, we * offset by canon column. Otherwise, tab spacing is normal. */ if (!(num_chars & 0x80)) num_chars += ldata->canon_column; num_bs = 8 - (num_chars & 7); if (num_bs > space) return -ENOSPC; space -= num_bs; while (num_bs--) { tty_put_char(tty, '\b'); if (ldata->column > 0) ldata->column--; } *tail += 3; break; } case ECHO_OP_SET_CANON_COL: ldata->canon_column = ldata->column; *tail += 2; break; case ECHO_OP_MOVE_BACK_COL: if (ldata->column > 0) ldata->column--; *tail += 2; break; case ECHO_OP_START: /* This is an escaped echo op start code */ if (!space) return -ENOSPC; tty_put_char(tty, ECHO_OP_START); ldata->column++; space--; *tail += 2; break; default: /* * If the op is not a special byte code, it is a ctrl char * tagged to be echoed as "^X" (where X is the letter * representing the control char). Note that we must ensure * there is enough space for the whole ctrl pair. */ if (space < 2) return -ENOSPC; tty_put_char(tty, '^'); tty_put_char(tty, op ^ 0100); ldata->column += 2; space -= 2; *tail += 2; break; } return space; } /** * __process_echoes - write pending echo characters * @tty: terminal device * * Write previously buffered echo (and other ldisc-generated) characters to the * tty. * * Characters generated by the ldisc (including echoes) need to be buffered * because the driver's write buffer can fill during heavy program output. * Echoing straight to the driver will often fail under these conditions, * causing lost characters and resulting mismatches of ldisc state information. * * Since the ldisc state must represent the characters actually sent to the * driver at the time of the write, operations like certain changes in column * state are also saved in the buffer and executed here. * * A circular fifo buffer is used so that the most recent characters are * prioritized. Also, when control characters are echoed with a prefixed "^", * the pair is treated atomically and thus not separated. * * Locking: callers must hold %output_lock. */ static size_t __process_echoes(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; int space, old_space; size_t tail; u8 c; old_space = space = tty_write_room(tty); tail = ldata->echo_tail; while (MASK(ldata->echo_commit) != MASK(tail)) { c = echo_buf(ldata, tail); if (c == ECHO_OP_START) { int ret = n_tty_process_echo_ops(tty, &tail, space); if (ret == -ENODATA) goto not_yet_stored; if (ret < 0) break; space = ret; } else { if (O_OPOST(tty)) { int retval = do_output_char(c, tty, space); if (retval < 0) break; space -= retval; } else { if (!space) break; tty_put_char(tty, c); space -= 1; } tail += 1; } } /* If the echo buffer is nearly full (so that the possibility exists * of echo overrun before the next commit), then discard enough * data at the tail to prevent a subsequent overrun */ while (ldata->echo_commit > tail && ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) { if (echo_buf(ldata, tail) == ECHO_OP_START) { if (echo_buf(ldata, tail + 1) == ECHO_OP_ERASE_TAB) tail += 3; else tail += 2; } else tail++; } not_yet_stored: ldata->echo_tail = tail; return old_space - space; } static void commit_echoes(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; size_t nr, old, echoed; size_t head; mutex_lock(&ldata->output_lock); head = ldata->echo_head; ldata->echo_mark = head; old = ldata->echo_commit - ldata->echo_tail; /* Process committed echoes if the accumulated # of bytes * is over the threshold (and try again each time another * block is accumulated) */ nr = head - ldata->echo_tail; if (nr < ECHO_COMMIT_WATERMARK || (nr % ECHO_BLOCK > old % ECHO_BLOCK)) { mutex_unlock(&ldata->output_lock); return; } ldata->echo_commit = head; echoed = __process_echoes(tty); mutex_unlock(&ldata->output_lock); if (echoed && tty->ops->flush_chars) tty->ops->flush_chars(tty); } static void process_echoes(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; size_t echoed; if (ldata->echo_mark == ldata->echo_tail) return; mutex_lock(&ldata->output_lock); ldata->echo_commit = ldata->echo_mark; echoed = __process_echoes(tty); mutex_unlock(&ldata->output_lock); if (echoed && tty->ops->flush_chars) tty->ops->flush_chars(tty); } /* NB: echo_mark and echo_head should be equivalent here */ static void flush_echoes(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; if ((!L_ECHO(tty) && !L_ECHONL(tty)) || ldata->echo_commit == ldata->echo_head) return; mutex_lock(&ldata->output_lock); ldata->echo_commit = ldata->echo_head; __process_echoes(tty); mutex_unlock(&ldata->output_lock); } /** * add_echo_byte - add a byte to the echo buffer * @c: unicode byte to echo * @ldata: n_tty data * * Add a character or operation byte to the echo buffer. */ static inline void add_echo_byte(u8 c, struct n_tty_data *ldata) { *echo_buf_addr(ldata, ldata->echo_head) = c; smp_wmb(); /* Matches smp_rmb() in echo_buf(). */ ldata->echo_head++; } /** * echo_move_back_col - add operation to move back a column * @ldata: n_tty data * * Add an operation to the echo buffer to move back one column. */ static void echo_move_back_col(struct n_tty_data *ldata) { add_echo_byte(ECHO_OP_START, ldata); add_echo_byte(ECHO_OP_MOVE_BACK_COL, ldata); } /** * echo_set_canon_col - add operation to set the canon column * @ldata: n_tty data * * Add an operation to the echo buffer to set the canon column to the current * column. */ static void echo_set_canon_col(struct n_tty_data *ldata) { add_echo_byte(ECHO_OP_START, ldata); add_echo_byte(ECHO_OP_SET_CANON_COL, ldata); } /** * echo_erase_tab - add operation to erase a tab * @num_chars: number of character columns already used * @after_tab: true if num_chars starts after a previous tab * @ldata: n_tty data * * Add an operation to the echo buffer to erase a tab. * * Called by the eraser function, which knows how many character columns have * been used since either a previous tab or the start of input. This * information will be used later, along with canon column (if applicable), to * go back the correct number of columns. */ static void echo_erase_tab(unsigned int num_chars, int after_tab, struct n_tty_data *ldata) { add_echo_byte(ECHO_OP_START, ldata); add_echo_byte(ECHO_OP_ERASE_TAB, ldata); /* We only need to know this modulo 8 (tab spacing) */ num_chars &= 7; /* Set the high bit as a flag if num_chars is after a previous tab */ if (after_tab) num_chars |= 0x80; add_echo_byte(num_chars, ldata); } /** * echo_char_raw - echo a character raw * @c: unicode byte to echo * @ldata: line disc data * * Echo user input back onto the screen. This must be called only when * L_ECHO(tty) is true. Called from the &tty_driver.receive_buf() path. * * This variant does not treat control characters specially. */ static void echo_char_raw(u8 c, struct n_tty_data *ldata) { if (c == ECHO_OP_START) { add_echo_byte(ECHO_OP_START, ldata); add_echo_byte(ECHO_OP_START, ldata); } else { add_echo_byte(c, ldata); } } /** * echo_char - echo a character * @c: unicode byte to echo * @tty: terminal device * * Echo user input back onto the screen. This must be called only when * L_ECHO(tty) is true. Called from the &tty_driver.receive_buf() path. * * This variant tags control characters to be echoed as "^X" (where X is the * letter representing the control char). */ static void echo_char(u8 c, const struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; if (c == ECHO_OP_START) { add_echo_byte(ECHO_OP_START, ldata); add_echo_byte(ECHO_OP_START, ldata); } else { if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t') add_echo_byte(ECHO_OP_START, ldata); add_echo_byte(c, ldata); } } /** * finish_erasing - complete erase * @ldata: n_tty data */ static inline void finish_erasing(struct n_tty_data *ldata) { if (ldata->erasing) { echo_char_raw('/', ldata); ldata->erasing = 0; } } /** * eraser - handle erase function * @c: character input * @tty: terminal device * * Perform erase and necessary output when an erase character is present in the * stream from the driver layer. Handles the complexities of UTF-8 multibyte * symbols. * * Locking: n_tty_receive_buf()/producer path: * caller holds non-exclusive %termios_rwsem */ static void eraser(u8 c, const struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; enum { ERASE, WERASE, KILL } kill_type; size_t head; size_t cnt; int seen_alnums; if (ldata->read_head == ldata->canon_head) { /* process_output('\a', tty); */ /* what do you think? */ return; } if (c == ERASE_CHAR(tty)) kill_type = ERASE; else if (c == WERASE_CHAR(tty)) kill_type = WERASE; else { if (!L_ECHO(tty)) { ldata->read_head = ldata->canon_head; return; } if (!L_ECHOK(tty) || !L_ECHOKE(tty) || !L_ECHOE(tty)) { ldata->read_head = ldata->canon_head; finish_erasing(ldata); echo_char(KILL_CHAR(tty), tty); /* Add a newline if ECHOK is on and ECHOKE is off. */ if (L_ECHOK(tty)) echo_char_raw('\n', ldata); return; } kill_type = KILL; } seen_alnums = 0; while (MASK(ldata->read_head) != MASK(ldata->canon_head)) { head = ldata->read_head; /* erase a single possibly multibyte character */ do { head--; c = read_buf(ldata, head); } while (is_continuation(c, tty) && MASK(head) != MASK(ldata->canon_head)); /* do not partially erase */ if (is_continuation(c, tty)) break; if (kill_type == WERASE) { /* Equivalent to BSD's ALTWERASE. */ if (isalnum(c) || c == '_') seen_alnums++; else if (seen_alnums) break; } cnt = ldata->read_head - head; ldata->read_head = head; if (L_ECHO(tty)) { if (L_ECHOPRT(tty)) { if (!ldata->erasing) { echo_char_raw('\\', ldata); ldata->erasing = 1; } /* if cnt > 1, output a multi-byte character */ echo_char(c, tty); while (--cnt > 0) { head++; echo_char_raw(read_buf(ldata, head), ldata); echo_move_back_col(ldata); } } else if (kill_type == ERASE && !L_ECHOE(tty)) { echo_char(ERASE_CHAR(tty), tty); } else if (c == '\t') { unsigned int num_chars = 0; int after_tab = 0; size_t tail = ldata->read_head; /* * Count the columns used for characters * since the start of input or after a * previous tab. * This info is used to go back the correct * number of columns. */ while (MASK(tail) != MASK(ldata->canon_head)) { tail--; c = read_buf(ldata, tail); if (c == '\t') { after_tab = 1; break; } else if (iscntrl(c)) { if (L_ECHOCTL(tty)) num_chars += 2; } else if (!is_continuation(c, tty)) { num_chars++; } } echo_erase_tab(num_chars, after_tab, ldata); } else { if (iscntrl(c) && L_ECHOCTL(tty)) { echo_char_raw('\b', ldata); echo_char_raw(' ', ldata); echo_char_raw('\b', ldata); } if (!iscntrl(c) || L_ECHOCTL(tty)) { echo_char_raw('\b', ldata); echo_char_raw(' ', ldata); echo_char_raw('\b', ldata); } } } if (kill_type == ERASE) break; } if (ldata->read_head == ldata->canon_head && L_ECHO(tty)) finish_erasing(ldata); } static void __isig(int sig, struct tty_struct *tty) { struct pid *tty_pgrp = tty_get_pgrp(tty); if (tty_pgrp) { kill_pgrp(tty_pgrp, sig, 1); put_pid(tty_pgrp); } } /** * isig - handle the ISIG optio * @sig: signal * @tty: terminal * * Called when a signal is being sent due to terminal input. Called from the * &tty_driver.receive_buf() path, so serialized. * * Performs input and output flush if !NOFLSH. In this context, the echo * buffer is 'output'. The signal is processed first to alert any current * readers or writers to discontinue and exit their i/o loops. * * Locking: %ctrl.lock */ static void isig(int sig, struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; if (L_NOFLSH(tty)) { /* signal only */ __isig(sig, tty); } else { /* signal and flush */ up_read(&tty->termios_rwsem); down_write(&tty->termios_rwsem); __isig(sig, tty); /* clear echo buffer */ mutex_lock(&ldata->output_lock); ldata->echo_head = ldata->echo_tail = 0; ldata->echo_mark = ldata->echo_commit = 0; mutex_unlock(&ldata->output_lock); /* clear output buffer */ tty_driver_flush_buffer(tty); /* clear input buffer */ reset_buffer_flags(tty->disc_data); /* notify pty master of flush */ if (tty->link) n_tty_packet_mode_flush(tty); up_write(&tty->termios_rwsem); down_read(&tty->termios_rwsem); } } /** * n_tty_receive_break - handle break * @tty: terminal * * An RS232 break event has been hit in the incoming bitstream. This can cause * a variety of events depending upon the termios settings. * * Locking: n_tty_receive_buf()/producer path: * caller holds non-exclusive termios_rwsem * * Note: may get exclusive %termios_rwsem if flushing input buffer */ static void n_tty_receive_break(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; if (I_IGNBRK(tty)) return; if (I_BRKINT(tty)) { isig(SIGINT, tty); return; } if (I_PARMRK(tty)) { put_tty_queue('\377', ldata); put_tty_queue('\0', ldata); } put_tty_queue('\0', ldata); } /** * n_tty_receive_overrun - handle overrun reporting * @tty: terminal * * Data arrived faster than we could process it. While the tty driver has * flagged this the bits that were missed are gone forever. * * Called from the receive_buf path so single threaded. Does not need locking * as num_overrun and overrun_time are function private. */ static void n_tty_receive_overrun(const struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; ldata->num_overrun++; if (time_is_before_jiffies(ldata->overrun_time + HZ)) { tty_warn(tty, "%u input overrun(s)\n", ldata->num_overrun); ldata->overrun_time = jiffies; ldata->num_overrun = 0; } } /** * n_tty_receive_parity_error - error notifier * @tty: terminal device * @c: character * * Process a parity error and queue the right data to indicate the error case * if necessary. * * Locking: n_tty_receive_buf()/producer path: * caller holds non-exclusive %termios_rwsem */ static void n_tty_receive_parity_error(const struct tty_struct *tty, u8 c) { struct n_tty_data *ldata = tty->disc_data; if (I_INPCK(tty)) { if (I_IGNPAR(tty)) return; if (I_PARMRK(tty)) { put_tty_queue('\377', ldata); put_tty_queue('\0', ldata); put_tty_queue(c, ldata); } else put_tty_queue('\0', ldata); } else put_tty_queue(c, ldata); } static void n_tty_receive_signal_char(struct tty_struct *tty, int signal, u8 c) { isig(signal, tty); if (I_IXON(tty)) start_tty(tty); if (L_ECHO(tty)) { echo_char(c, tty); commit_echoes(tty); } else process_echoes(tty); } static bool n_tty_is_char_flow_ctrl(struct tty_struct *tty, u8 c) { return c == START_CHAR(tty) || c == STOP_CHAR(tty); } /** * n_tty_receive_char_flow_ctrl - receive flow control chars * @tty: terminal device * @c: character * @lookahead_done: lookahead has processed this character already * * Receive and process flow control character actions. * * In case lookahead for flow control chars already handled the character in * advance to the normal receive, the actions are skipped during normal * receive. * * Returns true if @c is consumed as flow-control character, the character * must not be treated as normal character. */ static bool n_tty_receive_char_flow_ctrl(struct tty_struct *tty, u8 c, bool lookahead_done) { if (!n_tty_is_char_flow_ctrl(tty, c)) return false; if (lookahead_done) return true; if (c == START_CHAR(tty)) { start_tty(tty); process_echoes(tty); return true; } /* STOP_CHAR */ stop_tty(tty); return true; } static void n_tty_receive_handle_newline(struct tty_struct *tty, u8 c) { struct n_tty_data *ldata = tty->disc_data; set_bit(MASK(ldata->read_head), ldata->read_flags); put_tty_queue(c, ldata); smp_store_release(&ldata->canon_head, ldata->read_head); kill_fasync(&tty->fasync, SIGIO, POLL_IN); wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); } static bool n_tty_receive_char_canon(struct tty_struct *tty, u8 c) { struct n_tty_data *ldata = tty->disc_data; if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) || (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) { eraser(c, tty); commit_echoes(tty); return true; } if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) { ldata->lnext = 1; if (L_ECHO(tty)) { finish_erasing(ldata); if (L_ECHOCTL(tty)) { echo_char_raw('^', ldata); echo_char_raw('\b', ldata); commit_echoes(tty); } } return true; } if (c == REPRINT_CHAR(tty) && L_ECHO(tty) && L_IEXTEN(tty)) { size_t tail = ldata->canon_head; finish_erasing(ldata); echo_char(c, tty); echo_char_raw('\n', ldata); while (MASK(tail) != MASK(ldata->read_head)) { echo_char(read_buf(ldata, tail), tty); tail++; } commit_echoes(tty); return true; } if (c == '\n') { if (L_ECHO(tty) || L_ECHONL(tty)) { echo_char_raw('\n', ldata); commit_echoes(tty); } n_tty_receive_handle_newline(tty, c); return true; } if (c == EOF_CHAR(tty)) { c = __DISABLED_CHAR; n_tty_receive_handle_newline(tty, c); return true; } if ((c == EOL_CHAR(tty)) || (c == EOL2_CHAR(tty) && L_IEXTEN(tty))) { /* * XXX are EOL_CHAR and EOL2_CHAR echoed?!? */ if (L_ECHO(tty)) { /* Record the column of first canon char. */ if (ldata->canon_head == ldata->read_head) echo_set_canon_col(ldata); echo_char(c, tty); commit_echoes(tty); } /* * XXX does PARMRK doubling happen for * EOL_CHAR and EOL2_CHAR? */ if (c == '\377' && I_PARMRK(tty)) put_tty_queue(c, ldata); n_tty_receive_handle_newline(tty, c); return true; } return false; } static void n_tty_receive_char_special(struct tty_struct *tty, u8 c, bool lookahead_done) { struct n_tty_data *ldata = tty->disc_data; if (I_IXON(tty) && n_tty_receive_char_flow_ctrl(tty, c, lookahead_done)) return; if (L_ISIG(tty)) { if (c == INTR_CHAR(tty)) { n_tty_receive_signal_char(tty, SIGINT, c); return; } else if (c == QUIT_CHAR(tty)) { n_tty_receive_signal_char(tty, SIGQUIT, c); return; } else if (c == SUSP_CHAR(tty)) { n_tty_receive_signal_char(tty, SIGTSTP, c); return; } } if (tty->flow.stopped && !tty->flow.tco_stopped && I_IXON(tty) && I_IXANY(tty)) { start_tty(tty); process_echoes(tty); } if (c == '\r') { if (I_IGNCR(tty)) return; if (I_ICRNL(tty)) c = '\n'; } else if (c == '\n' && I_INLCR(tty)) c = '\r'; if (ldata->icanon && n_tty_receive_char_canon(tty, c)) return; if (L_ECHO(tty)) { finish_erasing(ldata); if (c == '\n') echo_char_raw('\n', ldata); else { /* Record the column of first canon char. */ if (ldata->canon_head == ldata->read_head) echo_set_canon_col(ldata); echo_char(c, tty); } commit_echoes(tty); } /* PARMRK doubling check */ if (c == '\377' && I_PARMRK(tty)) put_tty_queue(c, ldata); put_tty_queue(c, ldata); } /** * n_tty_receive_char - perform processing * @tty: terminal device * @c: character * * Process an individual character of input received from the driver. This is * serialized with respect to itself by the rules for the driver above. * * Locking: n_tty_receive_buf()/producer path: * caller holds non-exclusive %termios_rwsem * publishes canon_head if canonical mode is active */ static void n_tty_receive_char(struct tty_struct *tty, u8 c) { struct n_tty_data *ldata = tty->disc_data; if (tty->flow.stopped && !tty->flow.tco_stopped && I_IXON(tty) && I_IXANY(tty)) { start_tty(tty); process_echoes(tty); } if (L_ECHO(tty)) { finish_erasing(ldata); /* Record the column of first canon char. */ if (ldata->canon_head == ldata->read_head) echo_set_canon_col(ldata); echo_char(c, tty); commit_echoes(tty); } /* PARMRK doubling check */ if (c == '\377' && I_PARMRK(tty)) put_tty_queue(c, ldata); put_tty_queue(c, ldata); } static void n_tty_receive_char_closing(struct tty_struct *tty, u8 c, bool lookahead_done) { if (I_ISTRIP(tty)) c &= 0x7f; if (I_IUCLC(tty) && L_IEXTEN(tty)) c = tolower(c); if (I_IXON(tty)) { if (!n_tty_receive_char_flow_ctrl(tty, c, lookahead_done) && tty->flow.stopped && !tty->flow.tco_stopped && I_IXANY(tty) && c != INTR_CHAR(tty) && c != QUIT_CHAR(tty) && c != SUSP_CHAR(tty)) { start_tty(tty); process_echoes(tty); } } } static void n_tty_receive_char_flagged(struct tty_struct *tty, u8 c, u8 flag) { switch (flag) { case TTY_BREAK: n_tty_receive_break(tty); break; case TTY_PARITY: case TTY_FRAME: n_tty_receive_parity_error(tty, c); break; case TTY_OVERRUN: n_tty_receive_overrun(tty); break; default: tty_err(tty, "unknown flag %u\n", flag); break; } } static void n_tty_receive_char_lnext(struct tty_struct *tty, u8 c, u8 flag) { struct n_tty_data *ldata = tty->disc_data; ldata->lnext = 0; if (likely(flag == TTY_NORMAL)) { if (I_ISTRIP(tty)) c &= 0x7f; if (I_IUCLC(tty) && L_IEXTEN(tty)) c = tolower(c); n_tty_receive_char(tty, c); } else n_tty_receive_char_flagged(tty, c, flag); } /* Caller must ensure count > 0 */ static void n_tty_lookahead_flow_ctrl(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct n_tty_data *ldata = tty->disc_data; u8 flag = TTY_NORMAL; ldata->lookahead_count += count; if (!I_IXON(tty)) return; while (count--) { if (fp) flag = *fp++; if (likely(flag == TTY_NORMAL)) n_tty_receive_char_flow_ctrl(tty, *cp, false); cp++; } } static void n_tty_receive_buf_real_raw(const struct tty_struct *tty, const u8 *cp, size_t count) { struct n_tty_data *ldata = tty->disc_data; /* handle buffer wrap-around by a loop */ for (unsigned int i = 0; i < 2; i++) { size_t head = MASK(ldata->read_head); size_t n = min(count, N_TTY_BUF_SIZE - head); memcpy(read_buf_addr(ldata, head), cp, n); ldata->read_head += n; cp += n; count -= n; } } static void n_tty_receive_buf_raw(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct n_tty_data *ldata = tty->disc_data; u8 flag = TTY_NORMAL; while (count--) { if (fp) flag = *fp++; if (likely(flag == TTY_NORMAL)) put_tty_queue(*cp++, ldata); else n_tty_receive_char_flagged(tty, *cp++, flag); } } static void n_tty_receive_buf_closing(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count, bool lookahead_done) { u8 flag = TTY_NORMAL; while (count--) { if (fp) flag = *fp++; if (likely(flag == TTY_NORMAL)) n_tty_receive_char_closing(tty, *cp++, lookahead_done); } } static void n_tty_receive_buf_standard(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count, bool lookahead_done) { struct n_tty_data *ldata = tty->disc_data; u8 flag = TTY_NORMAL; while (count--) { u8 c = *cp++; if (fp) flag = *fp++; if (ldata->lnext) { n_tty_receive_char_lnext(tty, c, flag); continue; } if (unlikely(flag != TTY_NORMAL)) { n_tty_receive_char_flagged(tty, c, flag); continue; } if (I_ISTRIP(tty)) c &= 0x7f; if (I_IUCLC(tty) && L_IEXTEN(tty)) c = tolower(c); if (L_EXTPROC(tty)) { put_tty_queue(c, ldata); continue; } if (test_bit(c, ldata->char_map)) n_tty_receive_char_special(tty, c, lookahead_done); else n_tty_receive_char(tty, c); } } static void __receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct n_tty_data *ldata = tty->disc_data; bool preops = I_ISTRIP(tty) || (I_IUCLC(tty) && L_IEXTEN(tty)); size_t la_count = min(ldata->lookahead_count, count); if (ldata->real_raw) n_tty_receive_buf_real_raw(tty, cp, count); else if (ldata->raw || (L_EXTPROC(tty) && !preops)) n_tty_receive_buf_raw(tty, cp, fp, count); else if (tty->closing && !L_EXTPROC(tty)) { if (la_count > 0) { n_tty_receive_buf_closing(tty, cp, fp, la_count, true); cp += la_count; if (fp) fp += la_count; count -= la_count; } if (count > 0) n_tty_receive_buf_closing(tty, cp, fp, count, false); } else { if (la_count > 0) { n_tty_receive_buf_standard(tty, cp, fp, la_count, true); cp += la_count; if (fp) fp += la_count; count -= la_count; } if (count > 0) n_tty_receive_buf_standard(tty, cp, fp, count, false); flush_echoes(tty); if (tty->ops->flush_chars) tty->ops->flush_chars(tty); } ldata->lookahead_count -= la_count; if (ldata->icanon && !L_EXTPROC(tty)) return; /* publish read_head to consumer */ smp_store_release(&ldata->commit_head, ldata->read_head); if (read_cnt(ldata)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); } } /** * n_tty_receive_buf_common - process input * @tty: device to receive input * @cp: input chars * @fp: flags for each char (if %NULL, all chars are %TTY_NORMAL) * @count: number of input chars in @cp * @flow: enable flow control * * Called by the terminal driver when a block of characters has been received. * This function must be called from soft contexts not from interrupt context. * The driver is responsible for making calls one at a time and in order (or * using flush_to_ldisc()). * * Returns: the # of input chars from @cp which were processed. * * In canonical mode, the maximum line length is 4096 chars (including the line * termination char); lines longer than 4096 chars are truncated. After 4095 * chars, input data is still processed but not stored. Overflow processing * ensures the tty can always receive more input until at least one line can be * read. * * In non-canonical mode, the read buffer will only accept 4095 chars; this * provides the necessary space for a newline char if the input mode is * switched to canonical. * * Note it is possible for the read buffer to _contain_ 4096 chars in * non-canonical mode: the read buffer could already contain the maximum canon * line of 4096 chars when the mode is switched to non-canonical. * * Locking: n_tty_receive_buf()/producer path: * claims non-exclusive %termios_rwsem * publishes commit_head or canon_head */ static size_t n_tty_receive_buf_common(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count, bool flow) { struct n_tty_data *ldata = tty->disc_data; size_t n, rcvd = 0; int room, overflow; down_read(&tty->termios_rwsem); do { /* * When PARMRK is set, each input char may take up to 3 chars * in the read buf; reduce the buffer space avail by 3x * * If we are doing input canonicalization, and there are no * pending newlines, let characters through without limit, so * that erase characters will be handled. Other excess * characters will be beeped. * * paired with store in *_copy_from_read_buf() -- guarantees * the consumer has loaded the data in read_buf up to the new * read_tail (so this producer will not overwrite unread data) */ size_t tail = smp_load_acquire(&ldata->read_tail); room = N_TTY_BUF_SIZE - (ldata->read_head - tail); if (I_PARMRK(tty)) room = DIV_ROUND_UP(room, 3); room--; if (room <= 0) { overflow = ldata->icanon && ldata->canon_head == tail; if (overflow && room < 0) ldata->read_head--; room = overflow; WRITE_ONCE(ldata->no_room, flow && !room); } else overflow = 0; n = min_t(size_t, count, room); if (!n) break; /* ignore parity errors if handling overflow */ if (!overflow || !fp || *fp != TTY_PARITY) __receive_buf(tty, cp, fp, n); cp += n; if (fp) fp += n; count -= n; rcvd += n; } while (!test_bit(TTY_LDISC_CHANGING, &tty->flags)); tty->receive_room = room; /* Unthrottle if handling overflow on pty */ if (tty->driver->type == TTY_DRIVER_TYPE_PTY) { if (overflow) { tty_set_flow_change(tty, TTY_UNTHROTTLE_SAFE); tty_unthrottle_safe(tty); __tty_set_flow_change(tty, 0); } } else n_tty_check_throttle(tty); if (unlikely(ldata->no_room)) { /* * Barrier here is to ensure to read the latest read_tail in * chars_in_buffer() and to make sure that read_tail is not loaded * before ldata->no_room is set. */ smp_mb(); if (!chars_in_buffer(tty)) n_tty_kick_worker(tty); } up_read(&tty->termios_rwsem); return rcvd; } static void n_tty_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { n_tty_receive_buf_common(tty, cp, fp, count, false); } static size_t n_tty_receive_buf2(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { return n_tty_receive_buf_common(tty, cp, fp, count, true); } /** * n_tty_set_termios - termios data changed * @tty: terminal * @old: previous data * * Called by the tty layer when the user changes termios flags so that the line * discipline can plan ahead. This function cannot sleep and is protected from * re-entry by the tty layer. The user is guaranteed that this function will * not be re-entered or in progress when the ldisc is closed. * * Locking: Caller holds @tty->termios_rwsem */ static void n_tty_set_termios(struct tty_struct *tty, const struct ktermios *old) { struct n_tty_data *ldata = tty->disc_data; if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) { bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->line_start = ldata->read_tail; if (!L_ICANON(tty) || !read_cnt(ldata)) { ldata->canon_head = ldata->read_tail; ldata->push = 0; } else { set_bit(MASK(ldata->read_head - 1), ldata->read_flags); ldata->canon_head = ldata->read_head; ldata->push = 1; } ldata->commit_head = ldata->read_head; ldata->erasing = 0; ldata->lnext = 0; } ldata->icanon = (L_ICANON(tty) != 0); if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) || I_ICRNL(tty) || I_INLCR(tty) || L_ICANON(tty) || I_IXON(tty) || L_ISIG(tty) || L_ECHO(tty) || I_PARMRK(tty)) { bitmap_zero(ldata->char_map, 256); if (I_IGNCR(tty) || I_ICRNL(tty)) set_bit('\r', ldata->char_map); if (I_INLCR(tty)) set_bit('\n', ldata->char_map); if (L_ICANON(tty)) { set_bit(ERASE_CHAR(tty), ldata->char_map); set_bit(KILL_CHAR(tty), ldata->char_map); set_bit(EOF_CHAR(tty), ldata->char_map); set_bit('\n', ldata->char_map); set_bit(EOL_CHAR(tty), ldata->char_map); if (L_IEXTEN(tty)) { set_bit(WERASE_CHAR(tty), ldata->char_map); set_bit(LNEXT_CHAR(tty), ldata->char_map); set_bit(EOL2_CHAR(tty), ldata->char_map); if (L_ECHO(tty)) set_bit(REPRINT_CHAR(tty), ldata->char_map); } } if (I_IXON(tty)) { set_bit(START_CHAR(tty), ldata->char_map); set_bit(STOP_CHAR(tty), ldata->char_map); } if (L_ISIG(tty)) { set_bit(INTR_CHAR(tty), ldata->char_map); set_bit(QUIT_CHAR(tty), ldata->char_map); set_bit(SUSP_CHAR(tty), ldata->char_map); } clear_bit(__DISABLED_CHAR, ldata->char_map); ldata->raw = 0; ldata->real_raw = 0; } else { ldata->raw = 1; if ((I_IGNBRK(tty) || (!I_BRKINT(tty) && !I_PARMRK(tty))) && (I_IGNPAR(tty) || !I_INPCK(tty)) && (tty->driver->flags & TTY_DRIVER_REAL_RAW)) ldata->real_raw = 1; else ldata->real_raw = 0; } /* * Fix tty hang when I_IXON(tty) is cleared, but the tty * been stopped by STOP_CHAR(tty) before it. */ if (!I_IXON(tty) && old && (old->c_iflag & IXON) && !tty->flow.tco_stopped) { start_tty(tty); process_echoes(tty); } /* The termios change make the tty ready for I/O */ wake_up_interruptible(&tty->write_wait); wake_up_interruptible(&tty->read_wait); } /** * n_tty_close - close the ldisc for this tty * @tty: device * * Called from the terminal layer when this line discipline is being shut down, * either because of a close or becsuse of a discipline change. The function * will not be called while other ldisc methods are in progress. */ static void n_tty_close(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; if (tty->link) n_tty_packet_mode_flush(tty); down_write(&tty->termios_rwsem); vfree(ldata); tty->disc_data = NULL; up_write(&tty->termios_rwsem); } /** * n_tty_open - open an ldisc * @tty: terminal to open * * Called when this line discipline is being attached to the terminal device. * Can sleep. Called serialized so that no other events will occur in parallel. * No further open will occur until a close. */ static int n_tty_open(struct tty_struct *tty) { struct n_tty_data *ldata; /* Currently a malloc failure here can panic */ ldata = vzalloc(sizeof(*ldata)); if (!ldata) return -ENOMEM; ldata->overrun_time = jiffies; mutex_init(&ldata->atomic_read_lock); mutex_init(&ldata->output_lock); tty->disc_data = ldata; tty->closing = 0; /* indicate buffer work may resume */ clear_bit(TTY_LDISC_HALTED, &tty->flags); n_tty_set_termios(tty, NULL); tty_unthrottle(tty); return 0; } static inline int input_available_p(const struct tty_struct *tty, int poll) { const struct n_tty_data *ldata = tty->disc_data; int amt = poll && !TIME_CHAR(tty) && MIN_CHAR(tty) ? MIN_CHAR(tty) : 1; if (ldata->icanon && !L_EXTPROC(tty)) return ldata->canon_head != ldata->read_tail; else return ldata->commit_head - ldata->read_tail >= amt; } /** * copy_from_read_buf - copy read data directly * @tty: terminal device * @kbp: data * @nr: size of data * * Helper function to speed up n_tty_read(). It is only called when %ICANON is * off; it copies characters straight from the tty queue. * * Returns: true if it successfully copied data, but there is still more data * to be had. * * Locking: * * called under the @ldata->atomic_read_lock sem * * n_tty_read()/consumer path: * caller holds non-exclusive %termios_rwsem; * read_tail published */ static bool copy_from_read_buf(const struct tty_struct *tty, u8 **kbp, size_t *nr) { struct n_tty_data *ldata = tty->disc_data; size_t n; bool is_eof; size_t head = smp_load_acquire(&ldata->commit_head); size_t tail = MASK(ldata->read_tail); n = min3(head - ldata->read_tail, N_TTY_BUF_SIZE - tail, *nr); if (!n) return false; u8 *from = read_buf_addr(ldata, tail); memcpy(*kbp, from, n); is_eof = n == 1 && *from == EOF_CHAR(tty); tty_audit_add_data(tty, from, n); zero_buffer(tty, from, n); smp_store_release(&ldata->read_tail, ldata->read_tail + n); /* Turn single EOF into zero-length read */ if (L_EXTPROC(tty) && ldata->icanon && is_eof && head == ldata->read_tail) return false; *kbp += n; *nr -= n; /* If we have more to copy, let the caller know */ return head != ldata->read_tail; } /** * canon_copy_from_read_buf - copy read data in canonical mode * @tty: terminal device * @kbp: data * @nr: size of data * * Helper function for n_tty_read(). It is only called when %ICANON is on; it * copies one line of input up to and including the line-delimiting character * into the result buffer. * * Note: When termios is changed from non-canonical to canonical mode and the * read buffer contains data, n_tty_set_termios() simulates an EOF push (as if * C-d were input) _without_ the %DISABLED_CHAR in the buffer. This causes data * already processed as input to be immediately available as input although a * newline has not been received. * * Locking: * * called under the %atomic_read_lock mutex * * n_tty_read()/consumer path: * caller holds non-exclusive %termios_rwsem; * read_tail published */ static bool canon_copy_from_read_buf(const struct tty_struct *tty, u8 **kbp, size_t *nr) { struct n_tty_data *ldata = tty->disc_data; size_t n, size, more, c; size_t eol; size_t tail, canon_head; int found = 0; /* N.B. avoid overrun if nr == 0 */ if (!*nr) return false; canon_head = smp_load_acquire(&ldata->canon_head); n = min(*nr, canon_head - ldata->read_tail); tail = MASK(ldata->read_tail); size = min_t(size_t, tail + n, N_TTY_BUF_SIZE); n_tty_trace("%s: nr:%zu tail:%zu n:%zu size:%zu\n", __func__, *nr, tail, n, size); eol = find_next_bit(ldata->read_flags, size, tail); more = n - (size - tail); if (eol == N_TTY_BUF_SIZE && more) { /* scan wrapped without finding set bit */ eol = find_first_bit(ldata->read_flags, more); found = eol != more; } else found = eol != size; n = eol - tail; if (n > N_TTY_BUF_SIZE) n += N_TTY_BUF_SIZE; c = n + found; if (!found || read_buf(ldata, eol) != __DISABLED_CHAR) n = c; n_tty_trace("%s: eol:%zu found:%d n:%zu c:%zu tail:%zu more:%zu\n", __func__, eol, found, n, c, tail, more); tty_copy(tty, *kbp, tail, n); *kbp += n; *nr -= n; if (found) clear_bit(eol, ldata->read_flags); smp_store_release(&ldata->read_tail, ldata->read_tail + c); if (found) { if (!ldata->push) ldata->line_start = ldata->read_tail; else ldata->push = 0; tty_audit_push(); return false; } /* No EOL found - do a continuation retry if there is more data */ return ldata->read_tail != canon_head; } /* * If we finished a read at the exact location of an * EOF (special EOL character that's a __DISABLED_CHAR) * in the stream, silently eat the EOF. */ static void canon_skip_eof(struct n_tty_data *ldata) { size_t tail, canon_head; canon_head = smp_load_acquire(&ldata->canon_head); tail = ldata->read_tail; // No data? if (tail == canon_head) return; // See if the tail position is EOF in the circular buffer tail &= (N_TTY_BUF_SIZE - 1); if (!test_bit(tail, ldata->read_flags)) return; if (read_buf(ldata, tail) != __DISABLED_CHAR) return; // Clear the EOL bit, skip the EOF char. clear_bit(tail, ldata->read_flags); smp_store_release(&ldata->read_tail, ldata->read_tail + 1); } /** * job_control - check job control * @tty: tty * @file: file handle * * Perform job control management checks on this @file/@tty descriptor and if * appropriate send any needed signals and return a negative error code if * action should be taken. * * Locking: * * redirected write test is safe * * current->signal->tty check is safe * * ctrl.lock to safely reference @tty->ctrl.pgrp */ static int job_control(struct tty_struct *tty, struct file *file) { /* Job control check -- must be done at start and after every sleep (POSIX.1 7.1.1.4). */ /* NOTE: not yet done after every sleep pending a thorough check of the logic of this change. -- jlc */ /* don't stop on /dev/console */ if (file->f_op->write_iter == redirected_tty_write) return 0; return __tty_check_change(tty, SIGTTIN); } /** * n_tty_read - read function for tty * @tty: tty device * @file: file object * @kbuf: kernelspace buffer pointer * @nr: size of I/O * @cookie: if non-%NULL, this is a continuation read * @offset: where to continue reading from (unused in n_tty) * * Perform reads for the line discipline. We are guaranteed that the line * discipline will not be closed under us but we may get multiple parallel * readers and must handle this ourselves. We may also get a hangup. Always * called in user context, may sleep. * * This code must be sure never to sleep through a hangup. * * Locking: n_tty_read()/consumer path: * claims non-exclusive termios_rwsem; * publishes read_tail */ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, u8 *kbuf, size_t nr, void **cookie, unsigned long offset) { struct n_tty_data *ldata = tty->disc_data; u8 *kb = kbuf; DEFINE_WAIT_FUNC(wait, woken_wake_function); int minimum, time; ssize_t retval; long timeout; bool packet; size_t old_tail; /* * Is this a continuation of a read started earler? * * If so, we still hold the atomic_read_lock and the * termios_rwsem, and can just continue to copy data. */ if (*cookie) { if (ldata->icanon && !L_EXTPROC(tty)) { /* * If we have filled the user buffer, see * if we should skip an EOF character before * releasing the lock and returning done. */ if (!nr) canon_skip_eof(ldata); else if (canon_copy_from_read_buf(tty, &kb, &nr)) return kb - kbuf; } else { if (copy_from_read_buf(tty, &kb, &nr)) return kb - kbuf; } /* No more data - release locks and stop retries */ n_tty_kick_worker(tty); n_tty_check_unthrottle(tty); up_read(&tty->termios_rwsem); mutex_unlock(&ldata->atomic_read_lock); *cookie = NULL; return kb - kbuf; } retval = job_control(tty, file); if (retval < 0) return retval; /* * Internal serialization of reads. */ if (file->f_flags & O_NONBLOCK) { if (!mutex_trylock(&ldata->atomic_read_lock)) return -EAGAIN; } else { if (mutex_lock_interruptible(&ldata->atomic_read_lock)) return -ERESTARTSYS; } down_read(&tty->termios_rwsem); minimum = time = 0; timeout = MAX_SCHEDULE_TIMEOUT; if (!ldata->icanon) { minimum = MIN_CHAR(tty); if (minimum) { time = (HZ / 10) * TIME_CHAR(tty); } else { timeout = (HZ / 10) * TIME_CHAR(tty); minimum = 1; } } packet = tty->ctrl.packet; old_tail = ldata->read_tail; add_wait_queue(&tty->read_wait, &wait); while (nr) { /* First test for status change. */ if (packet && tty->link->ctrl.pktstatus) { u8 cs; if (kb != kbuf) break; spin_lock_irq(&tty->link->ctrl.lock); cs = tty->link->ctrl.pktstatus; tty->link->ctrl.pktstatus = 0; spin_unlock_irq(&tty->link->ctrl.lock); *kb++ = cs; nr--; break; } if (!input_available_p(tty, 0)) { up_read(&tty->termios_rwsem); tty_buffer_flush_work(tty->port); down_read(&tty->termios_rwsem); if (!input_available_p(tty, 0)) { if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) { retval = -EIO; break; } if (tty_hung_up_p(file)) break; /* * Abort readers for ttys which never actually * get hung up. See __tty_hangup(). */ if (test_bit(TTY_HUPPING, &tty->flags)) break; if (!timeout) break; if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } if (signal_pending(current)) { retval = -ERESTARTSYS; break; } up_read(&tty->termios_rwsem); timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); down_read(&tty->termios_rwsem); continue; } } if (ldata->icanon && !L_EXTPROC(tty)) { if (canon_copy_from_read_buf(tty, &kb, &nr)) goto more_to_be_read; } else { /* Deal with packet mode. */ if (packet && kb == kbuf) { *kb++ = TIOCPKT_DATA; nr--; } /* * Copy data, and if there is more to be had * and we have nothing more to wait for, then * let's mark us for retries. * * NOTE! We return here with both the termios_sem * and atomic_read_lock still held, the retries * will release them when done. */ if (copy_from_read_buf(tty, &kb, &nr) && kb - kbuf >= minimum) { more_to_be_read: remove_wait_queue(&tty->read_wait, &wait); *cookie = cookie; return kb - kbuf; } } n_tty_check_unthrottle(tty); if (kb - kbuf >= minimum) break; if (time) timeout = time; } if (old_tail != ldata->read_tail) { /* * Make sure no_room is not read in n_tty_kick_worker() * before setting ldata->read_tail in copy_from_read_buf(). */ smp_mb(); n_tty_kick_worker(tty); } up_read(&tty->termios_rwsem); remove_wait_queue(&tty->read_wait, &wait); mutex_unlock(&ldata->atomic_read_lock); if (kb - kbuf) retval = kb - kbuf; return retval; } /** * n_tty_write - write function for tty * @tty: tty device * @file: file object * @buf: userspace buffer pointer * @nr: size of I/O * * Write function of the terminal device. This is serialized with respect to * other write callers but not to termios changes, reads and other such events. * Since the receive code will echo characters, thus calling driver write * methods, the %output_lock is used in the output processing functions called * here as well as in the echo processing function to protect the column state * and space left in the buffer. * * This code must be sure never to sleep through a hangup. * * Locking: output_lock to protect column state and space left * (note that the process_output*() functions take this lock themselves) */ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, const u8 *buf, size_t nr) { const u8 *b = buf; DEFINE_WAIT_FUNC(wait, woken_wake_function); ssize_t num, retval = 0; /* Job control check -- must be done at start (POSIX.1 7.1.1.4). */ if (L_TOSTOP(tty) && file->f_op->write_iter != redirected_tty_write) { retval = tty_check_change(tty); if (retval) return retval; } down_read(&tty->termios_rwsem); /* Write out any echoed characters that are still pending */ process_echoes(tty); add_wait_queue(&tty->write_wait, &wait); while (1) { if (signal_pending(current)) { retval = -ERESTARTSYS; break; } if (tty_hung_up_p(file) || (tty->link && !tty->link->count)) { retval = -EIO; break; } if (O_OPOST(tty)) { while (nr > 0) { num = process_output_block(tty, b, nr); if (num < 0) { if (num == -EAGAIN) break; retval = num; goto break_out; } b += num; nr -= num; if (nr == 0) break; if (process_output(*b, tty) < 0) break; b++; nr--; } if (tty->ops->flush_chars) tty->ops->flush_chars(tty); } else { struct n_tty_data *ldata = tty->disc_data; while (nr > 0) { mutex_lock(&ldata->output_lock); num = tty->ops->write(tty, b, nr); mutex_unlock(&ldata->output_lock); if (num < 0) { retval = num; goto break_out; } if (!num) break; b += num; nr -= num; } } if (!nr) break; if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } up_read(&tty->termios_rwsem); wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); down_read(&tty->termios_rwsem); } break_out: remove_wait_queue(&tty->write_wait, &wait); if (nr && tty->fasync) set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); up_read(&tty->termios_rwsem); return (b - buf) ? b - buf : retval; } /** * n_tty_poll - poll method for N_TTY * @tty: terminal device * @file: file accessing it * @wait: poll table * * Called when the line discipline is asked to poll() for data or for special * events. This code is not serialized with respect to other events save * open/close. * * This code must be sure never to sleep through a hangup. * * Locking: called without the kernel lock held -- fine. */ static __poll_t n_tty_poll(struct tty_struct *tty, struct file *file, poll_table *wait) { __poll_t mask = 0; poll_wait(file, &tty->read_wait, wait); poll_wait(file, &tty->write_wait, wait); if (input_available_p(tty, 1)) mask |= EPOLLIN | EPOLLRDNORM; else { tty_buffer_flush_work(tty->port); if (input_available_p(tty, 1)) mask |= EPOLLIN | EPOLLRDNORM; } if (tty->ctrl.packet && tty->link->ctrl.pktstatus) mask |= EPOLLPRI | EPOLLIN | EPOLLRDNORM; if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= EPOLLHUP; if (tty_hung_up_p(file)) mask |= EPOLLHUP; if (tty->ops->write && !tty_is_writelocked(tty) && tty_chars_in_buffer(tty) < WAKEUP_CHARS && tty_write_room(tty) > 0) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } static unsigned long inq_canon(struct n_tty_data *ldata) { size_t nr, head, tail; if (ldata->canon_head == ldata->read_tail) return 0; head = ldata->canon_head; tail = ldata->read_tail; nr = head - tail; /* Skip EOF-chars.. */ while (MASK(head) != MASK(tail)) { if (test_bit(MASK(tail), ldata->read_flags) && read_buf(ldata, tail) == __DISABLED_CHAR) nr--; tail++; } return nr; } static int n_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct n_tty_data *ldata = tty->disc_data; unsigned int num; switch (cmd) { case TIOCOUTQ: return put_user(tty_chars_in_buffer(tty), (int __user *) arg); case TIOCINQ: down_write(&tty->termios_rwsem); if (L_ICANON(tty) && !L_EXTPROC(tty)) num = inq_canon(ldata); else num = read_cnt(ldata); up_write(&tty->termios_rwsem); return put_user(num, (unsigned int __user *) arg); default: return n_tty_ioctl_helper(tty, cmd, arg); } } static struct tty_ldisc_ops n_tty_ops = { .owner = THIS_MODULE, .num = N_TTY, .name = "n_tty", .open = n_tty_open, .close = n_tty_close, .flush_buffer = n_tty_flush_buffer, .read = n_tty_read, .write = n_tty_write, .ioctl = n_tty_ioctl, .set_termios = n_tty_set_termios, .poll = n_tty_poll, .receive_buf = n_tty_receive_buf, .write_wakeup = n_tty_write_wakeup, .receive_buf2 = n_tty_receive_buf2, .lookahead_buf = n_tty_lookahead_flow_ctrl, }; /** * n_tty_inherit_ops - inherit N_TTY methods * @ops: struct tty_ldisc_ops where to save N_TTY methods * * Enables a 'subclass' line discipline to 'inherit' N_TTY methods. */ void n_tty_inherit_ops(struct tty_ldisc_ops *ops) { *ops = n_tty_ops; ops->owner = NULL; } EXPORT_SYMBOL_GPL(n_tty_inherit_ops); void __init n_tty_init(void) { tty_register_ldisc(&n_tty_ops); } |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | // SPDX-License-Identifier: GPL-2.0 #include <linux/cpufreq.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> extern const struct seq_operations cpuinfo_op; static int cpuinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &cpuinfo_op); } static const struct proc_ops cpuinfo_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = cpuinfo_open, .proc_read_iter = seq_read_iter, .proc_lseek = seq_lseek, .proc_release = seq_release, }; static int __init proc_cpuinfo_init(void) { proc_create("cpuinfo", 0, NULL, &cpuinfo_proc_ops); return 0; } fs_initcall(proc_cpuinfo_init); |
141 27 144 2 2 8 10 282 353 185 209 350 8 3 2 5 11 11 472 2 1 467 5 341 32 21 4 5 16 7 469 1 6 429 5 3 5 2 407 46 2 405 469 467 52 462 9 466 464 223 7 53 177 111 2 5 1 103 106 4 142 141 142 77 144 46 12 38 43 8 34 21 41 71 72 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 | // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * */ #include <linux/fs.h> #include "debug.h" #include "ntfs.h" #include "ntfs_fs.h" static inline int compare_attr(const struct ATTRIB *left, enum ATTR_TYPE type, const __le16 *name, u8 name_len, const u16 *upcase) { /* First, compare the type codes. */ int diff = le32_to_cpu(left->type) - le32_to_cpu(type); if (diff) return diff; /* They have the same type code, so we have to compare the names. */ return ntfs_cmp_names(attr_name(left), left->name_len, name, name_len, upcase, true); } /* * mi_new_attt_id * * Return: Unused attribute id that is less than mrec->next_attr_id. */ static __le16 mi_new_attt_id(struct mft_inode *mi) { u16 free_id, max_id, t16; struct MFT_REC *rec = mi->mrec; struct ATTRIB *attr; __le16 id; id = rec->next_attr_id; free_id = le16_to_cpu(id); if (free_id < 0x7FFF) { rec->next_attr_id = cpu_to_le16(free_id + 1); return id; } /* One record can store up to 1024/24 ~= 42 attributes. */ free_id = 0; max_id = 0; attr = NULL; for (;;) { attr = mi_enum_attr(mi, attr); if (!attr) { rec->next_attr_id = cpu_to_le16(max_id + 1); mi->dirty = true; return cpu_to_le16(free_id); } t16 = le16_to_cpu(attr->id); if (t16 == free_id) { free_id += 1; attr = NULL; } else if (max_id < t16) max_id = t16; } } int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi) { int err; struct mft_inode *m = kzalloc(sizeof(struct mft_inode), GFP_NOFS); if (!m) return -ENOMEM; err = mi_init(m, sbi, rno); if (err) { kfree(m); return err; } err = mi_read(m, false); if (err) { mi_put(m); return err; } *mi = m; return 0; } void mi_put(struct mft_inode *mi) { mi_clear(mi); kfree(mi); } int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno) { mi->sbi = sbi; mi->rno = rno; mi->mrec = kmalloc(sbi->record_size, GFP_NOFS); if (!mi->mrec) return -ENOMEM; return 0; } /* * mi_read - Read MFT data. */ int mi_read(struct mft_inode *mi, bool is_mft) { int err; struct MFT_REC *rec = mi->mrec; struct ntfs_sb_info *sbi = mi->sbi; u32 bpr = sbi->record_size; u64 vbo = (u64)mi->rno << sbi->record_bits; struct ntfs_inode *mft_ni = sbi->mft.ni; struct runs_tree *run = mft_ni ? &mft_ni->file.run : NULL; struct rw_semaphore *rw_lock = NULL; if (is_mounted(sbi)) { if (!is_mft && mft_ni) { rw_lock = &mft_ni->file.run_lock; down_read(rw_lock); } } err = ntfs_read_bh(sbi, run, vbo, &rec->rhdr, bpr, &mi->nb); if (rw_lock) up_read(rw_lock); if (!err) goto ok; if (err == -E_NTFS_FIXUP) { mi->dirty = true; goto ok; } if (err != -ENOENT) goto out; if (rw_lock) { ni_lock(mft_ni); down_write(rw_lock); } err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, run, vbo >> sbi->cluster_bits); if (rw_lock) { up_write(rw_lock); ni_unlock(mft_ni); } if (err) goto out; if (rw_lock) down_read(rw_lock); err = ntfs_read_bh(sbi, run, vbo, &rec->rhdr, bpr, &mi->nb); if (rw_lock) up_read(rw_lock); if (err == -E_NTFS_FIXUP) { mi->dirty = true; goto ok; } if (err) goto out; ok: /* Check field 'total' only here. */ if (le32_to_cpu(rec->total) != bpr) { err = -EINVAL; goto out; } return 0; out: if (err == -E_NTFS_CORRUPT) { ntfs_err(sbi->sb, "mft corrupted"); ntfs_set_state(sbi, NTFS_DIRTY_ERROR); err = -EINVAL; } return err; } /* * mi_enum_attr - start/continue attributes enumeration in record. * * NOTE: mi->mrec - memory of size sbi->record_size * here we sure that mi->mrec->total == sbi->record_size (see mi_read) */ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; u32 used = le32_to_cpu(rec->used); u32 t32, off, asize, prev_type; u16 t16; u64 data_size, alloc_size, tot_size; if (!attr) { u32 total = le32_to_cpu(rec->total); off = le16_to_cpu(rec->attr_off); if (used > total) return NULL; if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 || !IS_ALIGNED(off, 4)) { return NULL; } /* Skip non-resident records. */ if (!is_rec_inuse(rec)) return NULL; prev_type = 0; attr = Add2Ptr(rec, off); } else { /* * We don't need to check previous attr here. There is * a bounds checking in the previous round. */ off = PtrOffset(rec, attr); asize = le32_to_cpu(attr->size); prev_type = le32_to_cpu(attr->type); attr = Add2Ptr(attr, asize); off += asize; } /* Can we use the first field (attr->type). */ /* NOTE: this code also checks attr->size availability. */ if (off + 8 > used) { static_assert(ALIGN(sizeof(enum ATTR_TYPE), 8) == 8); return NULL; } if (attr->type == ATTR_END) { /* End of enumeration. */ return NULL; } /* 0x100 is last known attribute for now. */ t32 = le32_to_cpu(attr->type); if (!t32 || (t32 & 0xf) || (t32 > 0x100)) return NULL; /* attributes in record must be ordered by type */ if (t32 < prev_type) return NULL; asize = le32_to_cpu(attr->size); /* Check overflow and boundary. */ if (off + asize < off || off + asize > used) return NULL; /* Check size of attribute. */ if (!attr->non_res) { /* Check resident fields. */ if (asize < SIZEOF_RESIDENT) return NULL; t16 = le16_to_cpu(attr->res.data_off); if (t16 > asize) return NULL; if (le32_to_cpu(attr->res.data_size) > asize - t16) return NULL; t32 = sizeof(short) * attr->name_len; if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) return NULL; return attr; } /* Check nonresident fields. */ if (attr->non_res != 1) return NULL; /* Can we use memory including attr->nres.valid_size? */ if (asize < SIZEOF_NONRESIDENT) return NULL; t16 = le16_to_cpu(attr->nres.run_off); if (t16 > asize) return NULL; t32 = sizeof(short) * attr->name_len; if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) return NULL; /* Check start/end vcn. */ if (le64_to_cpu(attr->nres.svcn) > le64_to_cpu(attr->nres.evcn) + 1) return NULL; data_size = le64_to_cpu(attr->nres.data_size); if (le64_to_cpu(attr->nres.valid_size) > data_size) return NULL; alloc_size = le64_to_cpu(attr->nres.alloc_size); if (data_size > alloc_size) return NULL; t32 = mi->sbi->cluster_mask; if (alloc_size & t32) return NULL; if (!attr->nres.svcn && is_attr_ext(attr)) { /* First segment of sparse/compressed attribute */ /* Can we use memory including attr->nres.total_size? */ if (asize < SIZEOF_NONRESIDENT_EX) return NULL; tot_size = le64_to_cpu(attr->nres.total_size); if (tot_size & t32) return NULL; if (tot_size > alloc_size) return NULL; } else { if (attr->nres.c_unit) return NULL; if (alloc_size > mi->sbi->volume.size) return NULL; } return attr; } /* * mi_find_attr - Find the attribute by type and name and id. */ struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, enum ATTR_TYPE type, const __le16 *name, u8 name_len, const __le16 *id) { u32 type_in = le32_to_cpu(type); u32 atype; next_attr: attr = mi_enum_attr(mi, attr); if (!attr) return NULL; atype = le32_to_cpu(attr->type); if (atype > type_in) return NULL; if (atype < type_in) goto next_attr; if (attr->name_len != name_len) goto next_attr; if (name_len && memcmp(attr_name(attr), name, name_len * sizeof(short))) goto next_attr; if (id && *id != attr->id) goto next_attr; return attr; } int mi_write(struct mft_inode *mi, int wait) { struct MFT_REC *rec; int err; struct ntfs_sb_info *sbi; if (!mi->dirty) return 0; sbi = mi->sbi; rec = mi->mrec; err = ntfs_write_bh(sbi, &rec->rhdr, &mi->nb, wait); if (err) return err; if (mi->rno < sbi->mft.recs_mirr) sbi->flags |= NTFS_FLAGS_MFTMIRR; mi->dirty = false; return 0; } int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, __le16 flags, bool is_mft) { int err; u16 seq = 1; struct MFT_REC *rec; u64 vbo = (u64)rno << sbi->record_bits; err = mi_init(mi, sbi, rno); if (err) return err; rec = mi->mrec; if (rno == MFT_REC_MFT) { ; } else if (rno < MFT_REC_FREE) { seq = rno; } else if (rno >= sbi->mft.used) { ; } else if (mi_read(mi, is_mft)) { ; } else if (rec->rhdr.sign == NTFS_FILE_SIGNATURE) { /* Record is reused. Update its sequence number. */ seq = le16_to_cpu(rec->seq) + 1; if (!seq) seq = 1; } memcpy(rec, sbi->new_rec, sbi->record_size); rec->seq = cpu_to_le16(seq); rec->flags = RECORD_FLAG_IN_USE | flags; if (MFTRECORD_FIXUP_OFFSET == MFTRECORD_FIXUP_OFFSET_3) rec->mft_record = cpu_to_le32(rno); mi->dirty = true; if (!mi->nb.nbufs) { struct ntfs_inode *ni = sbi->mft.ni; bool lock = false; if (is_mounted(sbi) && !is_mft) { down_read(&ni->file.run_lock); lock = true; } err = ntfs_get_bh(sbi, &ni->file.run, vbo, sbi->record_size, &mi->nb); if (lock) up_read(&ni->file.run_lock); } return err; } /* * mi_insert_attr - Reserve space for new attribute. * * Return: Not full constructed attribute or NULL if not possible to create. */ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, const __le16 *name, u8 name_len, u32 asize, u16 name_off) { size_t tail; struct ATTRIB *attr; __le16 id; struct MFT_REC *rec = mi->mrec; struct ntfs_sb_info *sbi = mi->sbi; u32 used = le32_to_cpu(rec->used); const u16 *upcase = sbi->upcase; /* Can we insert mi attribute? */ if (used + asize > sbi->record_size) return NULL; /* * Scan through the list of attributes to find the point * at which we should insert it. */ attr = NULL; while ((attr = mi_enum_attr(mi, attr))) { int diff = compare_attr(attr, type, name, name_len, upcase); if (diff < 0) continue; if (!diff && !is_attr_indexed(attr)) return NULL; break; } if (!attr) { /* Append. */ tail = 8; attr = Add2Ptr(rec, used - 8); } else { /* Insert before 'attr'. */ tail = used - PtrOffset(rec, attr); } id = mi_new_attt_id(mi); memmove(Add2Ptr(attr, asize), attr, tail); memset(attr, 0, asize); attr->type = type; attr->size = cpu_to_le32(asize); attr->name_len = name_len; attr->name_off = cpu_to_le16(name_off); attr->id = id; memmove(Add2Ptr(attr, name_off), name, name_len * sizeof(short)); rec->used = cpu_to_le32(used + asize); mi->dirty = true; return attr; } /* * mi_remove_attr - Remove the attribute from record. * * NOTE: The source attr will point to next attribute. */ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, struct ATTRIB *attr) { struct MFT_REC *rec = mi->mrec; u32 aoff = PtrOffset(rec, attr); u32 used = le32_to_cpu(rec->used); u32 asize = le32_to_cpu(attr->size); if (aoff + asize > used) return false; if (ni && is_attr_indexed(attr) && attr->type == ATTR_NAME) { u16 links = le16_to_cpu(ni->mi.mrec->hard_links); if (!links) { /* minor error. Not critical. */ } else { ni->mi.mrec->hard_links = cpu_to_le16(links - 1); ni->mi.dirty = true; } } used -= asize; memmove(attr, Add2Ptr(attr, asize), used - aoff); rec->used = cpu_to_le32(used); mi->dirty = true; return true; } /* bytes = "new attribute size" - "old attribute size" */ bool mi_resize_attr(struct mft_inode *mi, struct ATTRIB *attr, int bytes) { struct MFT_REC *rec = mi->mrec; u32 aoff = PtrOffset(rec, attr); u32 total, used = le32_to_cpu(rec->used); u32 nsize, asize = le32_to_cpu(attr->size); u32 rsize = le32_to_cpu(attr->res.data_size); int tail = (int)(used - aoff - asize); int dsize; char *next; if (tail < 0 || aoff >= used) return false; if (!bytes) return true; total = le32_to_cpu(rec->total); next = Add2Ptr(attr, asize); if (bytes > 0) { dsize = ALIGN(bytes, 8); if (used + dsize > total) return false; nsize = asize + dsize; /* Move tail */ memmove(next + dsize, next, tail); memset(next, 0, dsize); used += dsize; rsize += dsize; } else { dsize = ALIGN(-bytes, 8); if (dsize > asize) return false; nsize = asize - dsize; memmove(next - dsize, next, tail); used -= dsize; rsize -= dsize; } rec->used = cpu_to_le32(used); attr->size = cpu_to_le32(nsize); if (!attr->non_res) attr->res.data_size = cpu_to_le32(rsize); mi->dirty = true; return true; } /* * Pack runs in MFT record. * If failed record is not changed. */ int mi_pack_runs(struct mft_inode *mi, struct ATTRIB *attr, struct runs_tree *run, CLST len) { int err = 0; struct ntfs_sb_info *sbi = mi->sbi; u32 new_run_size; CLST plen; struct MFT_REC *rec = mi->mrec; CLST svcn = le64_to_cpu(attr->nres.svcn); u32 used = le32_to_cpu(rec->used); u32 aoff = PtrOffset(rec, attr); u32 asize = le32_to_cpu(attr->size); char *next = Add2Ptr(attr, asize); u16 run_off = le16_to_cpu(attr->nres.run_off); u32 run_size = asize - run_off; u32 tail = used - aoff - asize; u32 dsize = sbi->record_size - used; /* Make a maximum gap in current record. */ memmove(next + dsize, next, tail); /* Pack as much as possible. */ err = run_pack(run, svcn, len, Add2Ptr(attr, run_off), run_size + dsize, &plen); if (err < 0) { memmove(next, next + dsize, tail); return err; } new_run_size = ALIGN(err, 8); memmove(next + new_run_size - run_size, next + dsize, tail); attr->size = cpu_to_le32(asize + new_run_size - run_size); attr->nres.evcn = cpu_to_le64(svcn + plen - 1); rec->used = cpu_to_le32(used + new_run_size - run_size); mi->dirty = true; return 0; } |
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 4 1 1 1 1 1 3 6 1 2 1 1 1 1 1 14 2 4 1 1 1 1 1 1 1 1 9 9 13 3 1 9 9 9 9 4 9 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 | // SPDX-License-Identifier: GPL-2.0+ /* * drivers/usb/class/usbtmc.c - USB Test & Measurement class driver * * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany * Copyright (C) 2008 Novell, Inc. * Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (C) 2018 IVI Foundation, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/kref.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/mutex.h> #include <linux/usb.h> #include <linux/compat.h> #include <linux/usb/tmc.h> /* Increment API VERSION when changing tmc.h with new flags or ioctls * or when changing a significant behavior of the driver. */ #define USBTMC_API_VERSION (3) #define USBTMC_HEADER_SIZE 12 #define USBTMC_MINOR_BASE 176 /* Minimum USB timeout (in milliseconds) */ #define USBTMC_MIN_TIMEOUT 100 /* Default USB timeout (in milliseconds) */ #define USBTMC_TIMEOUT 5000 /* Max number of urbs used in write transfers */ #define MAX_URBS_IN_FLIGHT 16 /* I/O buffer size used in generic read/write functions */ #define USBTMC_BUFSIZE (4096) /* * Maximum number of read cycles to empty bulk in endpoint during CLEAR and * ABORT_BULK_IN requests. Ends the loop if (for whatever reason) a short * packet is never read. */ #define USBTMC_MAX_READS_TO_CLEAR_BULK_IN 100 static const struct usb_device_id usbtmc_devices[] = { { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, 3, 0), }, { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, 3, 1), }, { 0, } /* terminating entry */ }; MODULE_DEVICE_TABLE(usb, usbtmc_devices); /* * This structure is the capabilities for the device * See section 4.2.1.8 of the USBTMC specification, * and section 4.2.2 of the USBTMC usb488 subclass * specification for details. */ struct usbtmc_dev_capabilities { __u8 interface_capabilities; __u8 device_capabilities; __u8 usb488_interface_capabilities; __u8 usb488_device_capabilities; }; /* This structure holds private data for each USBTMC device. One copy is * allocated for each USBTMC device in the driver's probe function. */ struct usbtmc_device_data { const struct usb_device_id *id; struct usb_device *usb_dev; struct usb_interface *intf; struct list_head file_list; unsigned int bulk_in; unsigned int bulk_out; u8 bTag; u8 bTag_last_write; /* needed for abort */ u8 bTag_last_read; /* needed for abort */ /* packet size of IN bulk */ u16 wMaxPacketSize; /* data for interrupt in endpoint handling */ u8 bNotify1; u8 bNotify2; u16 ifnum; u8 iin_bTag; u8 *iin_buffer; atomic_t iin_data_valid; unsigned int iin_ep; int iin_ep_present; int iin_interval; struct urb *iin_urb; u16 iin_wMaxPacketSize; /* coalesced usb488_caps from usbtmc_dev_capabilities */ __u8 usb488_caps; bool zombie; /* fd of disconnected device */ struct usbtmc_dev_capabilities capabilities; struct kref kref; struct mutex io_mutex; /* only one i/o function running at a time */ wait_queue_head_t waitq; struct fasync_struct *fasync; spinlock_t dev_lock; /* lock for file_list */ }; #define to_usbtmc_data(d) container_of(d, struct usbtmc_device_data, kref) /* * This structure holds private data for each USBTMC file handle. */ struct usbtmc_file_data { struct usbtmc_device_data *data; struct list_head file_elem; u32 timeout; u8 srq_byte; atomic_t srq_asserted; atomic_t closing; u8 bmTransferAttributes; /* member of DEV_DEP_MSG_IN */ u8 eom_val; u8 term_char; bool term_char_enabled; bool auto_abort; spinlock_t err_lock; /* lock for errors */ struct usb_anchor submitted; /* data for generic_write */ struct semaphore limit_write_sem; u32 out_transfer_size; int out_status; /* data for generic_read */ u32 in_transfer_size; int in_status; int in_urbs_used; struct usb_anchor in_anchor; wait_queue_head_t wait_bulk_in; }; /* Forward declarations */ static struct usb_driver usbtmc_driver; static void usbtmc_draw_down(struct usbtmc_file_data *file_data); static void usbtmc_delete(struct kref *kref) { struct usbtmc_device_data *data = to_usbtmc_data(kref); usb_put_dev(data->usb_dev); kfree(data); } static int usbtmc_open(struct inode *inode, struct file *filp) { struct usb_interface *intf; struct usbtmc_device_data *data; struct usbtmc_file_data *file_data; intf = usb_find_interface(&usbtmc_driver, iminor(inode)); if (!intf) { pr_err("can not find device for minor %d", iminor(inode)); return -ENODEV; } file_data = kzalloc(sizeof(*file_data), GFP_KERNEL); if (!file_data) return -ENOMEM; spin_lock_init(&file_data->err_lock); sema_init(&file_data->limit_write_sem, MAX_URBS_IN_FLIGHT); init_usb_anchor(&file_data->submitted); init_usb_anchor(&file_data->in_anchor); init_waitqueue_head(&file_data->wait_bulk_in); data = usb_get_intfdata(intf); /* Protect reference to data from file structure until release */ kref_get(&data->kref); mutex_lock(&data->io_mutex); file_data->data = data; atomic_set(&file_data->closing, 0); file_data->timeout = USBTMC_TIMEOUT; file_data->term_char = '\n'; file_data->term_char_enabled = 0; file_data->auto_abort = 0; file_data->eom_val = 1; INIT_LIST_HEAD(&file_data->file_elem); spin_lock_irq(&data->dev_lock); list_add_tail(&file_data->file_elem, &data->file_list); spin_unlock_irq(&data->dev_lock); mutex_unlock(&data->io_mutex); /* Store pointer in file structure's private data field */ filp->private_data = file_data; return 0; } /* * usbtmc_flush - called before file handle is closed */ static int usbtmc_flush(struct file *file, fl_owner_t id) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; file_data = file->private_data; if (file_data == NULL) return -ENODEV; atomic_set(&file_data->closing, 1); data = file_data->data; /* wait for io to stop */ mutex_lock(&data->io_mutex); usbtmc_draw_down(file_data); spin_lock_irq(&file_data->err_lock); file_data->in_status = 0; file_data->in_transfer_size = 0; file_data->in_urbs_used = 0; file_data->out_status = 0; file_data->out_transfer_size = 0; spin_unlock_irq(&file_data->err_lock); wake_up_interruptible_all(&data->waitq); mutex_unlock(&data->io_mutex); return 0; } static int usbtmc_release(struct inode *inode, struct file *file) { struct usbtmc_file_data *file_data = file->private_data; /* prevent IO _AND_ usbtmc_interrupt */ mutex_lock(&file_data->data->io_mutex); spin_lock_irq(&file_data->data->dev_lock); list_del(&file_data->file_elem); spin_unlock_irq(&file_data->data->dev_lock); mutex_unlock(&file_data->data->io_mutex); kref_put(&file_data->data->kref, usbtmc_delete); file_data->data = NULL; kfree(file_data); return 0; } static int usbtmc_ioctl_abort_bulk_in_tag(struct usbtmc_device_data *data, u8 tag) { u8 *buffer; struct device *dev; int rv; int n; int actual; dev = &data->intf->dev; buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INITIATE_ABORT_BULK_IN, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, tag, data->bulk_in, buffer, 2, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INITIATE_ABORT_BULK_IN returned %x with tag %02x\n", buffer[0], buffer[1]); if (buffer[0] == USBTMC_STATUS_FAILED) { /* No transfer in progress and the Bulk-OUT FIFO is empty. */ rv = 0; goto exit; } if (buffer[0] == USBTMC_STATUS_TRANSFER_NOT_IN_PROGRESS) { /* The device returns this status if either: * - There is a transfer in progress, but the specified bTag * does not match. * - There is no transfer in progress, but the Bulk-OUT FIFO * is not empty. */ rv = -ENOMSG; goto exit; } if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INITIATE_ABORT_BULK_IN returned %x\n", buffer[0]); rv = -EPERM; goto exit; } n = 0; usbtmc_abort_bulk_in_status: dev_dbg(dev, "Reading from bulk in EP\n"); /* Data must be present. So use low timeout 300 ms */ actual = 0; rv = usb_bulk_msg(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, USBTMC_BUFSIZE, &actual, 300); print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, actual, true); n++; if (rv < 0) { dev_err(dev, "usb_bulk_msg returned %d\n", rv); if (rv != -ETIMEDOUT) goto exit; } if (actual == USBTMC_BUFSIZE) goto usbtmc_abort_bulk_in_status; if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) { dev_err(dev, "Couldn't clear device buffer within %d cycles\n", USBTMC_MAX_READS_TO_CLEAR_BULK_IN); rv = -EPERM; goto exit; } rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_CHECK_ABORT_BULK_IN_STATUS, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, 0, data->bulk_in, buffer, 0x08, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]); if (buffer[0] == USBTMC_STATUS_SUCCESS) { rv = 0; goto exit; } if (buffer[0] != USBTMC_STATUS_PENDING) { dev_err(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]); rv = -EPERM; goto exit; } if ((buffer[1] & 1) > 0) { /* The device has 1 or more queued packets the Host can read */ goto usbtmc_abort_bulk_in_status; } /* The Host must send CHECK_ABORT_BULK_IN_STATUS at a later time. */ rv = -EAGAIN; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_abort_bulk_in(struct usbtmc_device_data *data) { return usbtmc_ioctl_abort_bulk_in_tag(data, data->bTag_last_read); } static int usbtmc_ioctl_abort_bulk_out_tag(struct usbtmc_device_data *data, u8 tag) { struct device *dev; u8 *buffer; int rv; int n; dev = &data->intf->dev; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INITIATE_ABORT_BULK_OUT, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, tag, data->bulk_out, buffer, 2, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INITIATE_ABORT_BULK_OUT returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INITIATE_ABORT_BULK_OUT returned %x\n", buffer[0]); rv = -EPERM; goto exit; } n = 0; usbtmc_abort_bulk_out_check_status: /* do not stress device with subsequent requests */ msleep(50); rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_CHECK_ABORT_BULK_OUT_STATUS, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, 0, data->bulk_out, buffer, 0x08, USB_CTRL_GET_TIMEOUT); n++; if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "CHECK_ABORT_BULK_OUT returned %x\n", buffer[0]); if (buffer[0] == USBTMC_STATUS_SUCCESS) goto usbtmc_abort_bulk_out_clear_halt; if ((buffer[0] == USBTMC_STATUS_PENDING) && (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN)) goto usbtmc_abort_bulk_out_check_status; rv = -EPERM; goto exit; usbtmc_abort_bulk_out_clear_halt: rv = usb_clear_halt(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out)); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } rv = 0; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_abort_bulk_out(struct usbtmc_device_data *data) { return usbtmc_ioctl_abort_bulk_out_tag(data, data->bTag_last_write); } static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; u8 *buffer; u8 tag; int rv; dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n", data->iin_ep_present); buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return -ENOMEM; atomic_set(&data->iin_data_valid, 0); rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC488_REQUEST_READ_STATUS_BYTE, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, data->iin_bTag, data->ifnum, buffer, 0x03, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "stb usb_control_msg returned %d\n", rv); goto exit; } if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "control status returned %x\n", buffer[0]); rv = -EIO; goto exit; } if (data->iin_ep_present) { rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&data->iin_data_valid) != 0, file_data->timeout); if (rv < 0) { dev_dbg(dev, "wait interrupted %d\n", rv); goto exit; } if (rv == 0) { dev_dbg(dev, "wait timed out\n"); rv = -ETIMEDOUT; goto exit; } tag = data->bNotify1 & 0x7f; if (tag != data->iin_bTag) { dev_err(dev, "expected bTag %x got %x\n", data->iin_bTag, tag); } *stb = data->bNotify2; } else { *stb = buffer[2]; } dev_dbg(dev, "stb:0x%02x received %d\n", (unsigned int)*stb, rv); exit: /* bump interrupt bTag */ data->iin_bTag += 1; if (data->iin_bTag > 127) /* 1 is for SRQ see USBTMC-USB488 subclass spec section 4.3.1 */ data->iin_bTag = 2; kfree(buffer); return rv; } static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data, void __user *arg) { int srq_asserted = 0; __u8 stb; int rv; rv = usbtmc_get_stb(file_data, &stb); if (rv > 0) { srq_asserted = atomic_xchg(&file_data->srq_asserted, srq_asserted); if (srq_asserted) stb |= 0x40; /* Set RQS bit */ rv = put_user(stb, (__u8 __user *)arg); } return rv; } static int usbtmc_ioctl_get_srq_stb(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; int srq_asserted = 0; __u8 stb = 0; int rv; spin_lock_irq(&data->dev_lock); srq_asserted = atomic_xchg(&file_data->srq_asserted, srq_asserted); if (srq_asserted) { stb = file_data->srq_byte; spin_unlock_irq(&data->dev_lock); rv = put_user(stb, (__u8 __user *)arg); } else { spin_unlock_irq(&data->dev_lock); rv = -ENOMSG; } dev_dbg(dev, "stb:0x%02x with srq received %d\n", (unsigned int)stb, rv); return rv; } static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, __u32 __user *arg) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; int rv; u32 timeout; unsigned long expire; if (!data->iin_ep_present) { dev_dbg(dev, "no interrupt endpoint present\n"); return -EFAULT; } if (get_user(timeout, arg)) return -EFAULT; expire = msecs_to_jiffies(timeout); mutex_unlock(&data->io_mutex); rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&file_data->srq_asserted) != 0 || atomic_read(&file_data->closing), expire); mutex_lock(&data->io_mutex); /* Note! disconnect or close could be called in the meantime */ if (atomic_read(&file_data->closing) || data->zombie) rv = -ENODEV; if (rv < 0) { /* dev can be invalid now! */ pr_debug("%s - wait interrupted %d\n", __func__, rv); return rv; } if (rv == 0) { dev_dbg(dev, "%s - wait timed out\n", __func__); return -ETIMEDOUT; } dev_dbg(dev, "%s - srq asserted\n", __func__); return 0; } static int usbtmc488_ioctl_simple(struct usbtmc_device_data *data, void __user *arg, unsigned int cmd) { struct device *dev = &data->intf->dev; __u8 val; u8 *buffer; u16 wValue; int rv; if (!(data->usb488_caps & USBTMC488_CAPABILITY_SIMPLE)) return -EINVAL; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return -ENOMEM; if (cmd == USBTMC488_REQUEST_REN_CONTROL) { rv = copy_from_user(&val, arg, sizeof(val)); if (rv) { rv = -EFAULT; goto exit; } wValue = val ? 1 : 0; } else { wValue = 0; } rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), cmd, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, wValue, data->ifnum, buffer, 0x01, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "simple usb_control_msg failed %d\n", rv); goto exit; } else if (rv != 1) { dev_warn(dev, "simple usb_control_msg returned %d\n", rv); rv = -EIO; goto exit; } if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "simple control status returned %x\n", buffer[0]); rv = -EIO; goto exit; } rv = 0; exit: kfree(buffer); return rv; } /* * Sends a TRIGGER Bulk-OUT command message * See the USBTMC-USB488 specification, Table 2. * * Also updates bTag_last_write. */ static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data) { struct usbtmc_device_data *data = file_data->data; int retval; u8 *buffer; int actual; buffer = kzalloc(USBTMC_HEADER_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; buffer[0] = 128; buffer[1] = data->bTag; buffer[2] = ~data->bTag; retval = usb_bulk_msg(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, &actual, file_data->timeout); /* Store bTag (in case we need to abort) */ data->bTag_last_write = data->bTag; /* Increment bTag -- and increment again if zero */ data->bTag++; if (!data->bTag) data->bTag++; kfree(buffer); if (retval < 0) { dev_err(&data->intf->dev, "%s returned %d\n", __func__, retval); return retval; } return 0; } static struct urb *usbtmc_create_urb(void) { const size_t bufsize = USBTMC_BUFSIZE; u8 *dmabuf = NULL; struct urb *urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return NULL; dmabuf = kzalloc(bufsize, GFP_KERNEL); if (!dmabuf) { usb_free_urb(urb); return NULL; } urb->transfer_buffer = dmabuf; urb->transfer_buffer_length = bufsize; urb->transfer_flags |= URB_FREE_BUFFER; return urb; } static void usbtmc_read_bulk_cb(struct urb *urb) { struct usbtmc_file_data *file_data = urb->context; int status = urb->status; unsigned long flags; /* sync/async unlink faults aren't errors */ if (status) { if (!(/* status == -ENOENT || */ status == -ECONNRESET || status == -EREMOTEIO || /* Short packet */ status == -ESHUTDOWN)) dev_err(&file_data->data->intf->dev, "%s - nonzero read bulk status received: %d\n", __func__, status); spin_lock_irqsave(&file_data->err_lock, flags); if (!file_data->in_status) file_data->in_status = status; spin_unlock_irqrestore(&file_data->err_lock, flags); } spin_lock_irqsave(&file_data->err_lock, flags); file_data->in_transfer_size += urb->actual_length; dev_dbg(&file_data->data->intf->dev, "%s - total size: %u current: %d status: %d\n", __func__, file_data->in_transfer_size, urb->actual_length, status); spin_unlock_irqrestore(&file_data->err_lock, flags); usb_anchor_urb(urb, &file_data->in_anchor); wake_up_interruptible(&file_data->wait_bulk_in); wake_up_interruptible(&file_data->data->waitq); } static inline bool usbtmc_do_transfer(struct usbtmc_file_data *file_data) { bool data_or_error; spin_lock_irq(&file_data->err_lock); data_or_error = !usb_anchor_empty(&file_data->in_anchor) || file_data->in_status; spin_unlock_irq(&file_data->err_lock); dev_dbg(&file_data->data->intf->dev, "%s: returns %d\n", __func__, data_or_error); return data_or_error; } static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, void __user *user_buffer, u32 transfer_size, u32 *transferred, u32 flags) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; u32 done = 0; u32 remaining; const u32 bufsize = USBTMC_BUFSIZE; int retval = 0; u32 max_transfer_size; unsigned long expire; int bufcount = 1; int again = 0; /* mutex already locked */ *transferred = done; max_transfer_size = transfer_size; if (flags & USBTMC_FLAG_IGNORE_TRAILER) { /* The device may send extra alignment bytes (up to * wMaxPacketSize – 1) to avoid sending a zero-length * packet */ remaining = transfer_size; if ((max_transfer_size % data->wMaxPacketSize) == 0) max_transfer_size += (data->wMaxPacketSize - 1); } else { /* round down to bufsize to avoid truncated data left */ if (max_transfer_size > bufsize) { max_transfer_size = roundup(max_transfer_size + 1 - bufsize, bufsize); } remaining = max_transfer_size; } spin_lock_irq(&file_data->err_lock); if (file_data->in_status) { /* return the very first error */ retval = file_data->in_status; spin_unlock_irq(&file_data->err_lock); goto error; } if (flags & USBTMC_FLAG_ASYNC) { if (usb_anchor_empty(&file_data->in_anchor)) again = 1; if (file_data->in_urbs_used == 0) { file_data->in_transfer_size = 0; file_data->in_status = 0; } } else { file_data->in_transfer_size = 0; file_data->in_status = 0; } if (max_transfer_size == 0) { bufcount = 0; } else { bufcount = roundup(max_transfer_size, bufsize) / bufsize; if (bufcount > file_data->in_urbs_used) bufcount -= file_data->in_urbs_used; else bufcount = 0; if (bufcount + file_data->in_urbs_used > MAX_URBS_IN_FLIGHT) { bufcount = MAX_URBS_IN_FLIGHT - file_data->in_urbs_used; } } spin_unlock_irq(&file_data->err_lock); dev_dbg(dev, "%s: requested=%u flags=0x%X size=%u bufs=%d used=%d\n", __func__, transfer_size, flags, max_transfer_size, bufcount, file_data->in_urbs_used); while (bufcount > 0) { u8 *dmabuf = NULL; struct urb *urb = usbtmc_create_urb(); if (!urb) { retval = -ENOMEM; goto error; } dmabuf = urb->transfer_buffer; usb_fill_bulk_urb(urb, data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), dmabuf, bufsize, usbtmc_read_bulk_cb, file_data); usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); /* urb is anchored. We can release our reference. */ usb_free_urb(urb); if (unlikely(retval)) { usb_unanchor_urb(urb); goto error; } file_data->in_urbs_used++; bufcount--; } if (again) { dev_dbg(dev, "%s: ret=again\n", __func__); return -EAGAIN; } if (user_buffer == NULL) return -EINVAL; expire = msecs_to_jiffies(file_data->timeout); while (max_transfer_size > 0) { u32 this_part; struct urb *urb = NULL; if (!(flags & USBTMC_FLAG_ASYNC)) { dev_dbg(dev, "%s: before wait time %lu\n", __func__, expire); retval = wait_event_interruptible_timeout( file_data->wait_bulk_in, usbtmc_do_transfer(file_data), expire); dev_dbg(dev, "%s: wait returned %d\n", __func__, retval); if (retval <= 0) { if (retval == 0) retval = -ETIMEDOUT; goto error; } } urb = usb_get_from_anchor(&file_data->in_anchor); if (!urb) { if (!(flags & USBTMC_FLAG_ASYNC)) { /* synchronous case: must not happen */ retval = -EFAULT; goto error; } /* asynchronous case: ready, do not block or wait */ *transferred = done; dev_dbg(dev, "%s: (async) done=%u ret=0\n", __func__, done); return 0; } file_data->in_urbs_used--; if (max_transfer_size > urb->actual_length) max_transfer_size -= urb->actual_length; else max_transfer_size = 0; if (remaining > urb->actual_length) this_part = urb->actual_length; else this_part = remaining; print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, urb->transfer_buffer, urb->actual_length, true); if (copy_to_user(user_buffer + done, urb->transfer_buffer, this_part)) { usb_free_urb(urb); retval = -EFAULT; goto error; } remaining -= this_part; done += this_part; spin_lock_irq(&file_data->err_lock); if (urb->status) { /* return the very first error */ retval = file_data->in_status; spin_unlock_irq(&file_data->err_lock); usb_free_urb(urb); goto error; } spin_unlock_irq(&file_data->err_lock); if (urb->actual_length < bufsize) { /* short packet or ZLP received => ready */ usb_free_urb(urb); retval = 1; break; } if (!(flags & USBTMC_FLAG_ASYNC) && max_transfer_size > (bufsize * file_data->in_urbs_used)) { /* resubmit, since other buffers still not enough */ usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); if (unlikely(retval)) { usb_unanchor_urb(urb); usb_free_urb(urb); goto error; } file_data->in_urbs_used++; } usb_free_urb(urb); retval = 0; } error: *transferred = done; dev_dbg(dev, "%s: before kill\n", __func__); /* Attention: killing urbs can take long time (2 ms) */ usb_kill_anchored_urbs(&file_data->submitted); dev_dbg(dev, "%s: after kill\n", __func__); usb_scuttle_anchored_urbs(&file_data->in_anchor); file_data->in_urbs_used = 0; file_data->in_status = 0; /* no spinlock needed here */ dev_dbg(dev, "%s: done=%u ret=%d\n", __func__, done, retval); return retval; } static ssize_t usbtmc_ioctl_generic_read(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_message msg; ssize_t retval = 0; /* mutex already locked */ if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message))) return -EFAULT; retval = usbtmc_generic_read(file_data, msg.message, msg.transfer_size, &msg.transferred, msg.flags); if (put_user(msg.transferred, &((struct usbtmc_message __user *)arg)->transferred)) return -EFAULT; return retval; } static void usbtmc_write_bulk_cb(struct urb *urb) { struct usbtmc_file_data *file_data = urb->context; int wakeup = 0; unsigned long flags; spin_lock_irqsave(&file_data->err_lock, flags); file_data->out_transfer_size += urb->actual_length; /* sync/async unlink faults aren't errors */ if (urb->status) { if (!(urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN)) dev_err(&file_data->data->intf->dev, "%s - nonzero write bulk status received: %d\n", __func__, urb->status); if (!file_data->out_status) { file_data->out_status = urb->status; wakeup = 1; } } spin_unlock_irqrestore(&file_data->err_lock, flags); dev_dbg(&file_data->data->intf->dev, "%s - write bulk total size: %u\n", __func__, file_data->out_transfer_size); up(&file_data->limit_write_sem); if (usb_anchor_empty(&file_data->submitted) || wakeup) wake_up_interruptible(&file_data->data->waitq); } static ssize_t usbtmc_generic_write(struct usbtmc_file_data *file_data, const void __user *user_buffer, u32 transfer_size, u32 *transferred, u32 flags) { struct usbtmc_device_data *data = file_data->data; struct device *dev; u32 done = 0; u32 remaining; unsigned long expire; const u32 bufsize = USBTMC_BUFSIZE; struct urb *urb = NULL; int retval = 0; u32 timeout; *transferred = 0; /* Get pointer to private data structure */ dev = &data->intf->dev; dev_dbg(dev, "%s: size=%u flags=0x%X sema=%u\n", __func__, transfer_size, flags, file_data->limit_write_sem.count); if (flags & USBTMC_FLAG_APPEND) { spin_lock_irq(&file_data->err_lock); retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); if (retval < 0) return retval; } else { spin_lock_irq(&file_data->err_lock); file_data->out_transfer_size = 0; file_data->out_status = 0; spin_unlock_irq(&file_data->err_lock); } remaining = transfer_size; if (remaining > INT_MAX) remaining = INT_MAX; timeout = file_data->timeout; expire = msecs_to_jiffies(timeout); while (remaining > 0) { u32 this_part, aligned; u8 *buffer = NULL; if (flags & USBTMC_FLAG_ASYNC) { if (down_trylock(&file_data->limit_write_sem)) { retval = (done)?(0):(-EAGAIN); goto exit; } } else { retval = down_timeout(&file_data->limit_write_sem, expire); if (retval < 0) { retval = -ETIMEDOUT; goto error; } } spin_lock_irq(&file_data->err_lock); retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); if (retval < 0) { up(&file_data->limit_write_sem); goto error; } /* prepare next urb to send */ urb = usbtmc_create_urb(); if (!urb) { retval = -ENOMEM; up(&file_data->limit_write_sem); goto error; } buffer = urb->transfer_buffer; if (remaining > bufsize) this_part = bufsize; else this_part = remaining; if (copy_from_user(buffer, user_buffer + done, this_part)) { retval = -EFAULT; up(&file_data->limit_write_sem); goto error; } print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, this_part, true); /* fill bulk with 32 bit alignment to meet USBTMC specification * (size + 3 & ~3) rounds up and simplifies user code */ aligned = (this_part + 3) & ~3; dev_dbg(dev, "write(size:%u align:%u done:%u)\n", (unsigned int)this_part, (unsigned int)aligned, (unsigned int)done); usb_fill_bulk_urb(urb, data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), urb->transfer_buffer, aligned, usbtmc_write_bulk_cb, file_data); usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); if (unlikely(retval)) { usb_unanchor_urb(urb); up(&file_data->limit_write_sem); goto error; } usb_free_urb(urb); urb = NULL; /* urb will be finally released by usb driver */ remaining -= this_part; done += this_part; } /* All urbs are on the fly */ if (!(flags & USBTMC_FLAG_ASYNC)) { if (!usb_wait_anchor_empty_timeout(&file_data->submitted, timeout)) { retval = -ETIMEDOUT; goto error; } } retval = 0; goto exit; error: usb_kill_anchored_urbs(&file_data->submitted); exit: usb_free_urb(urb); spin_lock_irq(&file_data->err_lock); if (!(flags & USBTMC_FLAG_ASYNC)) done = file_data->out_transfer_size; if (!retval && file_data->out_status) retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); *transferred = done; dev_dbg(dev, "%s: done=%u, retval=%d, urbstat=%d\n", __func__, done, retval, file_data->out_status); return retval; } static ssize_t usbtmc_ioctl_generic_write(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_message msg; ssize_t retval = 0; /* mutex already locked */ if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message))) return -EFAULT; retval = usbtmc_generic_write(file_data, msg.message, msg.transfer_size, &msg.transferred, msg.flags); if (put_user(msg.transferred, &((struct usbtmc_message __user *)arg)->transferred)) return -EFAULT; return retval; } /* * Get the generic write result */ static ssize_t usbtmc_ioctl_write_result(struct usbtmc_file_data *file_data, void __user *arg) { u32 transferred; int retval; spin_lock_irq(&file_data->err_lock); transferred = file_data->out_transfer_size; retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); if (put_user(transferred, (__u32 __user *)arg)) return -EFAULT; return retval; } /* * Sends a REQUEST_DEV_DEP_MSG_IN message on the Bulk-OUT endpoint. * @transfer_size: number of bytes to request from the device. * * See the USBTMC specification, Table 4. * * Also updates bTag_last_write. */ static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data, u32 transfer_size) { struct usbtmc_device_data *data = file_data->data; int retval; u8 *buffer; int actual; buffer = kmalloc(USBTMC_HEADER_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; /* Setup IO buffer for REQUEST_DEV_DEP_MSG_IN message * Refer to class specs for details */ buffer[0] = 2; buffer[1] = data->bTag; buffer[2] = ~data->bTag; buffer[3] = 0; /* Reserved */ buffer[4] = transfer_size >> 0; buffer[5] = transfer_size >> 8; buffer[6] = transfer_size >> 16; buffer[7] = transfer_size >> 24; buffer[8] = file_data->term_char_enabled * 2; /* Use term character? */ buffer[9] = file_data->term_char; buffer[10] = 0; /* Reserved */ buffer[11] = 0; /* Reserved */ /* Send bulk URB */ retval = usb_bulk_msg(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, &actual, file_data->timeout); /* Store bTag (in case we need to abort) */ data->bTag_last_write = data->bTag; /* Increment bTag -- and increment again if zero */ data->bTag++; if (!data->bTag) data->bTag++; kfree(buffer); if (retval < 0) dev_err(&data->intf->dev, "%s returned %d\n", __func__, retval); return retval; } static ssize_t usbtmc_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; struct device *dev; const u32 bufsize = USBTMC_BUFSIZE; u32 n_characters; u8 *buffer; int actual; u32 done = 0; u32 remaining; int retval; /* Get pointer to private data structure */ file_data = filp->private_data; data = file_data->data; dev = &data->intf->dev; buffer = kmalloc(bufsize, GFP_KERNEL); if (!buffer) return -ENOMEM; mutex_lock(&data->io_mutex); if (data->zombie) { retval = -ENODEV; goto exit; } if (count > INT_MAX) count = INT_MAX; dev_dbg(dev, "%s(count:%zu)\n", __func__, count); retval = send_request_dev_dep_msg_in(file_data, count); if (retval < 0) { if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_out(data); goto exit; } /* Loop until we have fetched everything we requested */ remaining = count; actual = 0; /* Send bulk URB */ retval = usb_bulk_msg(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, bufsize, &actual, file_data->timeout); dev_dbg(dev, "%s: bulk_msg retval(%u), actual(%d)\n", __func__, retval, actual); /* Store bTag (in case we need to abort) */ data->bTag_last_read = data->bTag; if (retval < 0) { if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } /* Sanity checks for the header */ if (actual < USBTMC_HEADER_SIZE) { dev_err(dev, "Device sent too small first packet: %u < %u\n", actual, USBTMC_HEADER_SIZE); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } if (buffer[0] != 2) { dev_err(dev, "Device sent reply with wrong MsgID: %u != 2\n", buffer[0]); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } if (buffer[1] != data->bTag_last_write) { dev_err(dev, "Device sent reply with wrong bTag: %u != %u\n", buffer[1], data->bTag_last_write); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } /* How many characters did the instrument send? */ n_characters = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24); file_data->bmTransferAttributes = buffer[8]; dev_dbg(dev, "Bulk-IN header: N_characters(%u), bTransAttr(%u)\n", n_characters, buffer[8]); if (n_characters > remaining) { dev_err(dev, "Device wants to return more data than requested: %u > %zu\n", n_characters, count); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, actual, true); remaining = n_characters; /* Remove the USBTMC header */ actual -= USBTMC_HEADER_SIZE; /* Remove padding if it exists */ if (actual > remaining) actual = remaining; remaining -= actual; /* Copy buffer to user space */ if (copy_to_user(buf, &buffer[USBTMC_HEADER_SIZE], actual)) { /* There must have been an addressing problem */ retval = -EFAULT; goto exit; } if ((actual + USBTMC_HEADER_SIZE) == bufsize) { retval = usbtmc_generic_read(file_data, buf + actual, remaining, &done, USBTMC_FLAG_IGNORE_TRAILER); if (retval < 0) goto exit; } done += actual; /* Update file position value */ *f_pos = *f_pos + done; retval = done; exit: mutex_unlock(&data->io_mutex); kfree(buffer); return retval; } static ssize_t usbtmc_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; struct urb *urb = NULL; ssize_t retval = 0; u8 *buffer; u32 remaining, done; u32 transfersize, aligned, buflen; file_data = filp->private_data; data = file_data->data; mutex_lock(&data->io_mutex); if (data->zombie) { retval = -ENODEV; goto exit; } done = 0; spin_lock_irq(&file_data->err_lock); file_data->out_transfer_size = 0; file_data->out_status = 0; spin_unlock_irq(&file_data->err_lock); if (!count) goto exit; if (down_trylock(&file_data->limit_write_sem)) { /* previous calls were async */ retval = -EBUSY; goto exit; } urb = usbtmc_create_urb(); if (!urb) { retval = -ENOMEM; up(&file_data->limit_write_sem); goto exit; } buffer = urb->transfer_buffer; buflen = urb->transfer_buffer_length; if (count > INT_MAX) { transfersize = INT_MAX; buffer[8] = 0; } else { transfersize = count; buffer[8] = file_data->eom_val; } /* Setup IO buffer for DEV_DEP_MSG_OUT message */ buffer[0] = 1; buffer[1] = data->bTag; buffer[2] = ~data->bTag; buffer[3] = 0; /* Reserved */ buffer[4] = transfersize >> 0; buffer[5] = transfersize >> 8; buffer[6] = transfersize >> 16; buffer[7] = transfersize >> 24; /* buffer[8] is set above... */ buffer[9] = 0; /* Reserved */ buffer[10] = 0; /* Reserved */ buffer[11] = 0; /* Reserved */ remaining = transfersize; if (transfersize + USBTMC_HEADER_SIZE > buflen) { transfersize = buflen - USBTMC_HEADER_SIZE; aligned = buflen; } else { aligned = (transfersize + (USBTMC_HEADER_SIZE + 3)) & ~3; } if (copy_from_user(&buffer[USBTMC_HEADER_SIZE], buf, transfersize)) { retval = -EFAULT; up(&file_data->limit_write_sem); goto exit; } dev_dbg(&data->intf->dev, "%s(size:%u align:%u)\n", __func__, (unsigned int)transfersize, (unsigned int)aligned); print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, aligned, true); usb_fill_bulk_urb(urb, data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), urb->transfer_buffer, aligned, usbtmc_write_bulk_cb, file_data); usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); if (unlikely(retval)) { usb_unanchor_urb(urb); up(&file_data->limit_write_sem); goto exit; } remaining -= transfersize; data->bTag_last_write = data->bTag; data->bTag++; if (!data->bTag) data->bTag++; /* call generic_write even when remaining = 0 */ retval = usbtmc_generic_write(file_data, buf + transfersize, remaining, &done, USBTMC_FLAG_APPEND); /* truncate alignment bytes */ if (done > remaining) done = remaining; /*add size of first urb*/ done += transfersize; if (retval < 0) { usb_kill_anchored_urbs(&file_data->submitted); dev_err(&data->intf->dev, "Unable to send data, error %d\n", (int)retval); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_out(data); goto exit; } retval = done; exit: usb_free_urb(urb); mutex_unlock(&data->io_mutex); return retval; } static int usbtmc_ioctl_clear(struct usbtmc_device_data *data) { struct device *dev; u8 *buffer; int rv; int n; int actual = 0; dev = &data->intf->dev; dev_dbg(dev, "Sending INITIATE_CLEAR request\n"); buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INITIATE_CLEAR, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 1, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INITIATE_CLEAR returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INITIATE_CLEAR returned %x\n", buffer[0]); rv = -EPERM; goto exit; } n = 0; usbtmc_clear_check_status: dev_dbg(dev, "Sending CHECK_CLEAR_STATUS request\n"); rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_CHECK_CLEAR_STATUS, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 2, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "CHECK_CLEAR_STATUS returned %x\n", buffer[0]); if (buffer[0] == USBTMC_STATUS_SUCCESS) goto usbtmc_clear_bulk_out_halt; if (buffer[0] != USBTMC_STATUS_PENDING) { dev_err(dev, "CHECK_CLEAR_STATUS returned %x\n", buffer[0]); rv = -EPERM; goto exit; } if ((buffer[1] & 1) != 0) { do { dev_dbg(dev, "Reading from bulk in EP\n"); actual = 0; rv = usb_bulk_msg(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, USBTMC_BUFSIZE, &actual, USB_CTRL_GET_TIMEOUT); print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, actual, true); n++; if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } } while ((actual == USBTMC_BUFSIZE) && (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN)); } else { /* do not stress device with subsequent requests */ msleep(50); n++; } if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) { dev_err(dev, "Couldn't clear device buffer within %d cycles\n", USBTMC_MAX_READS_TO_CLEAR_BULK_IN); rv = -EPERM; goto exit; } goto usbtmc_clear_check_status; usbtmc_clear_bulk_out_halt: rv = usb_clear_halt(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out)); if (rv < 0) { dev_err(dev, "usb_clear_halt returned %d\n", rv); goto exit; } rv = 0; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_clear_out_halt(struct usbtmc_device_data *data) { int rv; rv = usb_clear_halt(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out)); if (rv < 0) dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv); return rv; } static int usbtmc_ioctl_clear_in_halt(struct usbtmc_device_data *data) { int rv; rv = usb_clear_halt(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in)); if (rv < 0) dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv); return rv; } static int usbtmc_ioctl_cancel_io(struct usbtmc_file_data *file_data) { spin_lock_irq(&file_data->err_lock); file_data->in_status = -ECANCELED; file_data->out_status = -ECANCELED; spin_unlock_irq(&file_data->err_lock); usb_kill_anchored_urbs(&file_data->submitted); return 0; } static int usbtmc_ioctl_cleanup_io(struct usbtmc_file_data *file_data) { usb_kill_anchored_urbs(&file_data->submitted); usb_scuttle_anchored_urbs(&file_data->in_anchor); spin_lock_irq(&file_data->err_lock); file_data->in_status = 0; file_data->in_transfer_size = 0; file_data->out_status = 0; file_data->out_transfer_size = 0; spin_unlock_irq(&file_data->err_lock); file_data->in_urbs_used = 0; return 0; } static int get_capabilities(struct usbtmc_device_data *data) { struct device *dev = &data->usb_dev->dev; char *buffer; int rv = 0; buffer = kmalloc(0x18, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_GET_CAPABILITIES, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 0x18, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto err_out; } dev_dbg(dev, "GET_CAPABILITIES returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "GET_CAPABILITIES returned %x\n", buffer[0]); rv = -EPERM; goto err_out; } dev_dbg(dev, "Interface capabilities are %x\n", buffer[4]); dev_dbg(dev, "Device capabilities are %x\n", buffer[5]); dev_dbg(dev, "USB488 interface capabilities are %x\n", buffer[14]); dev_dbg(dev, "USB488 device capabilities are %x\n", buffer[15]); data->capabilities.interface_capabilities = buffer[4]; data->capabilities.device_capabilities = buffer[5]; data->capabilities.usb488_interface_capabilities = buffer[14]; data->capabilities.usb488_device_capabilities = buffer[15]; data->usb488_caps = (buffer[14] & 0x07) | ((buffer[15] & 0x0f) << 4); rv = 0; err_out: kfree(buffer); return rv; } #define capability_attribute(name) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ struct usbtmc_device_data *data = usb_get_intfdata(intf); \ \ return sprintf(buf, "%d\n", data->capabilities.name); \ } \ static DEVICE_ATTR_RO(name) capability_attribute(interface_capabilities); capability_attribute(device_capabilities); capability_attribute(usb488_interface_capabilities); capability_attribute(usb488_device_capabilities); static struct attribute *usbtmc_attrs[] = { &dev_attr_interface_capabilities.attr, &dev_attr_device_capabilities.attr, &dev_attr_usb488_interface_capabilities.attr, &dev_attr_usb488_device_capabilities.attr, NULL, }; ATTRIBUTE_GROUPS(usbtmc); static int usbtmc_ioctl_indicator_pulse(struct usbtmc_device_data *data) { struct device *dev; u8 *buffer; int rv; dev = &data->intf->dev; buffer = kmalloc(2, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INDICATOR_PULSE, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 0x01, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INDICATOR_PULSE returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INDICATOR_PULSE returned %x\n", buffer[0]); rv = -EPERM; goto exit; } rv = 0; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_request(struct usbtmc_device_data *data, void __user *arg) { struct device *dev = &data->intf->dev; struct usbtmc_ctrlrequest request; u8 *buffer = NULL; int rv; unsigned int is_in, pipe; unsigned long res; res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest)); if (res) return -EFAULT; if (request.req.wLength > USBTMC_BUFSIZE) return -EMSGSIZE; if (request.req.wLength == 0) /* Length-0 requests are never IN */ request.req.bRequestType &= ~USB_DIR_IN; is_in = request.req.bRequestType & USB_DIR_IN; if (request.req.wLength) { buffer = kmalloc(request.req.wLength, GFP_KERNEL); if (!buffer) return -ENOMEM; if (!is_in) { /* Send control data to device */ res = copy_from_user(buffer, request.data, request.req.wLength); if (res) { rv = -EFAULT; goto exit; } } } if (is_in) pipe = usb_rcvctrlpipe(data->usb_dev, 0); else pipe = usb_sndctrlpipe(data->usb_dev, 0); rv = usb_control_msg(data->usb_dev, pipe, request.req.bRequest, request.req.bRequestType, request.req.wValue, request.req.wIndex, buffer, request.req.wLength, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "%s failed %d\n", __func__, rv); goto exit; } if (rv && is_in) { /* Read control data from device */ res = copy_to_user(request.data, buffer, rv); if (res) rv = -EFAULT; } exit: kfree(buffer); return rv; } /* * Get the usb timeout value */ static int usbtmc_ioctl_get_timeout(struct usbtmc_file_data *file_data, void __user *arg) { u32 timeout; timeout = file_data->timeout; return put_user(timeout, (__u32 __user *)arg); } /* * Set the usb timeout value */ static int usbtmc_ioctl_set_timeout(struct usbtmc_file_data *file_data, void __user *arg) { u32 timeout; if (get_user(timeout, (__u32 __user *)arg)) return -EFAULT; /* Note that timeout = 0 means * MAX_SCHEDULE_TIMEOUT in usb_control_msg */ if (timeout < USBTMC_MIN_TIMEOUT) return -EINVAL; file_data->timeout = timeout; return 0; } /* * enables/disables sending EOM on write */ static int usbtmc_ioctl_eom_enable(struct usbtmc_file_data *file_data, void __user *arg) { u8 eom_enable; if (copy_from_user(&eom_enable, arg, sizeof(eom_enable))) return -EFAULT; if (eom_enable > 1) return -EINVAL; file_data->eom_val = eom_enable; return 0; } /* * Configure termination character for read() */ static int usbtmc_ioctl_config_termc(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_termchar termc; if (copy_from_user(&termc, arg, sizeof(termc))) return -EFAULT; if ((termc.term_char_enabled > 1) || (termc.term_char_enabled && !(file_data->data->capabilities.device_capabilities & 1))) return -EINVAL; file_data->term_char = termc.term_char; file_data->term_char_enabled = termc.term_char_enabled; return 0; } static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; int retval = -EBADRQC; __u8 tmp_byte; file_data = file->private_data; data = file_data->data; mutex_lock(&data->io_mutex); if (data->zombie) { retval = -ENODEV; goto skip_io_on_zombie; } switch (cmd) { case USBTMC_IOCTL_CLEAR_OUT_HALT: retval = usbtmc_ioctl_clear_out_halt(data); break; case USBTMC_IOCTL_CLEAR_IN_HALT: retval = usbtmc_ioctl_clear_in_halt(data); break; case USBTMC_IOCTL_INDICATOR_PULSE: retval = usbtmc_ioctl_indicator_pulse(data); break; case USBTMC_IOCTL_CLEAR: retval = usbtmc_ioctl_clear(data); break; case USBTMC_IOCTL_ABORT_BULK_OUT: retval = usbtmc_ioctl_abort_bulk_out(data); break; case USBTMC_IOCTL_ABORT_BULK_IN: retval = usbtmc_ioctl_abort_bulk_in(data); break; case USBTMC_IOCTL_CTRL_REQUEST: retval = usbtmc_ioctl_request(data, (void __user *)arg); break; case USBTMC_IOCTL_GET_TIMEOUT: retval = usbtmc_ioctl_get_timeout(file_data, (void __user *)arg); break; case USBTMC_IOCTL_SET_TIMEOUT: retval = usbtmc_ioctl_set_timeout(file_data, (void __user *)arg); break; case USBTMC_IOCTL_EOM_ENABLE: retval = usbtmc_ioctl_eom_enable(file_data, (void __user *)arg); break; case USBTMC_IOCTL_CONFIG_TERMCHAR: retval = usbtmc_ioctl_config_termc(file_data, (void __user *)arg); break; case USBTMC_IOCTL_WRITE: retval = usbtmc_ioctl_generic_write(file_data, (void __user *)arg); break; case USBTMC_IOCTL_READ: retval = usbtmc_ioctl_generic_read(file_data, (void __user *)arg); break; case USBTMC_IOCTL_WRITE_RESULT: retval = usbtmc_ioctl_write_result(file_data, (void __user *)arg); break; case USBTMC_IOCTL_API_VERSION: retval = put_user(USBTMC_API_VERSION, (__u32 __user *)arg); break; case USBTMC488_IOCTL_GET_CAPS: retval = put_user(data->usb488_caps, (unsigned char __user *)arg); break; case USBTMC488_IOCTL_READ_STB: retval = usbtmc488_ioctl_read_stb(file_data, (void __user *)arg); break; case USBTMC488_IOCTL_REN_CONTROL: retval = usbtmc488_ioctl_simple(data, (void __user *)arg, USBTMC488_REQUEST_REN_CONTROL); break; case USBTMC488_IOCTL_GOTO_LOCAL: retval = usbtmc488_ioctl_simple(data, (void __user *)arg, USBTMC488_REQUEST_GOTO_LOCAL); break; case USBTMC488_IOCTL_LOCAL_LOCKOUT: retval = usbtmc488_ioctl_simple(data, (void __user *)arg, USBTMC488_REQUEST_LOCAL_LOCKOUT); break; case USBTMC488_IOCTL_TRIGGER: retval = usbtmc488_ioctl_trigger(file_data); break; case USBTMC488_IOCTL_WAIT_SRQ: retval = usbtmc488_ioctl_wait_srq(file_data, (__u32 __user *)arg); break; case USBTMC_IOCTL_MSG_IN_ATTR: retval = put_user(file_data->bmTransferAttributes, (__u8 __user *)arg); break; case USBTMC_IOCTL_AUTO_ABORT: retval = get_user(tmp_byte, (unsigned char __user *)arg); if (retval == 0) file_data->auto_abort = !!tmp_byte; break; case USBTMC_IOCTL_GET_STB: retval = usbtmc_get_stb(file_data, &tmp_byte); if (retval > 0) retval = put_user(tmp_byte, (__u8 __user *)arg); break; case USBTMC_IOCTL_GET_SRQ_STB: retval = usbtmc_ioctl_get_srq_stb(file_data, (void __user *)arg); break; case USBTMC_IOCTL_CANCEL_IO: retval = usbtmc_ioctl_cancel_io(file_data); break; case USBTMC_IOCTL_CLEANUP_IO: retval = usbtmc_ioctl_cleanup_io(file_data); break; } skip_io_on_zombie: mutex_unlock(&data->io_mutex); return retval; } static int usbtmc_fasync(int fd, struct file *file, int on) { struct usbtmc_file_data *file_data = file->private_data; return fasync_helper(fd, file, on, &file_data->data->fasync); } static __poll_t usbtmc_poll(struct file *file, poll_table *wait) { struct usbtmc_file_data *file_data = file->private_data; struct usbtmc_device_data *data = file_data->data; __poll_t mask; mutex_lock(&data->io_mutex); if (data->zombie) { mask = EPOLLHUP | EPOLLERR; goto no_poll; } poll_wait(file, &data->waitq, wait); /* Note that EPOLLPRI is now assigned to SRQ, and * EPOLLIN|EPOLLRDNORM to normal read data. */ mask = 0; if (atomic_read(&file_data->srq_asserted)) mask |= EPOLLPRI; /* Note that the anchor submitted includes all urbs for BULK IN * and OUT. So EPOLLOUT is signaled when BULK OUT is empty and * all BULK IN urbs are completed and moved to in_anchor. */ if (usb_anchor_empty(&file_data->submitted)) mask |= (EPOLLOUT | EPOLLWRNORM); if (!usb_anchor_empty(&file_data->in_anchor)) mask |= (EPOLLIN | EPOLLRDNORM); spin_lock_irq(&file_data->err_lock); if (file_data->in_status || file_data->out_status) mask |= EPOLLERR; spin_unlock_irq(&file_data->err_lock); dev_dbg(&data->intf->dev, "poll mask = %x\n", mask); no_poll: mutex_unlock(&data->io_mutex); return mask; } static const struct file_operations fops = { .owner = THIS_MODULE, .read = usbtmc_read, .write = usbtmc_write, .open = usbtmc_open, .release = usbtmc_release, .flush = usbtmc_flush, .unlocked_ioctl = usbtmc_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = usbtmc_fasync, .poll = usbtmc_poll, .llseek = default_llseek, }; static struct usb_class_driver usbtmc_class = { .name = "usbtmc%d", .fops = &fops, .minor_base = USBTMC_MINOR_BASE, }; static void usbtmc_interrupt(struct urb *urb) { struct usbtmc_device_data *data = urb->context; struct device *dev = &data->intf->dev; int status = urb->status; int rv; dev_dbg(&data->intf->dev, "int status: %d len %d\n", status, urb->actual_length); switch (status) { case 0: /* SUCCESS */ /* check for valid STB notification */ if (data->iin_buffer[0] > 0x81) { data->bNotify1 = data->iin_buffer[0]; data->bNotify2 = data->iin_buffer[1]; atomic_set(&data->iin_data_valid, 1); wake_up_interruptible(&data->waitq); goto exit; } /* check for SRQ notification */ if (data->iin_buffer[0] == 0x81) { unsigned long flags; struct list_head *elem; if (data->fasync) kill_fasync(&data->fasync, SIGIO, POLL_PRI); spin_lock_irqsave(&data->dev_lock, flags); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); file_data->srq_byte = data->iin_buffer[1]; atomic_set(&file_data->srq_asserted, 1); } spin_unlock_irqrestore(&data->dev_lock, flags); dev_dbg(dev, "srq received bTag %x stb %x\n", (unsigned int)data->iin_buffer[0], (unsigned int)data->iin_buffer[1]); wake_up_interruptible_all(&data->waitq); goto exit; } dev_warn(dev, "invalid notification: %x\n", data->iin_buffer[0]); break; case -EOVERFLOW: dev_err(dev, "overflow with length %d, actual length is %d\n", data->iin_wMaxPacketSize, urb->actual_length); fallthrough; default: /* urb terminated, clean up */ dev_dbg(dev, "urb terminated, status: %d\n", status); return; } exit: rv = usb_submit_urb(urb, GFP_ATOMIC); if (rv) dev_err(dev, "usb_submit_urb failed: %d\n", rv); } static void usbtmc_free_int(struct usbtmc_device_data *data) { if (!data->iin_ep_present || !data->iin_urb) return; usb_kill_urb(data->iin_urb); kfree(data->iin_buffer); data->iin_buffer = NULL; usb_free_urb(data->iin_urb); data->iin_urb = NULL; kref_put(&data->kref, usbtmc_delete); } static int usbtmc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usbtmc_device_data *data; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in; int retcode; dev_dbg(&intf->dev, "%s called\n", __func__); data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->intf = intf; data->id = id; data->usb_dev = usb_get_dev(interface_to_usbdev(intf)); usb_set_intfdata(intf, data); kref_init(&data->kref); mutex_init(&data->io_mutex); init_waitqueue_head(&data->waitq); atomic_set(&data->iin_data_valid, 0); INIT_LIST_HEAD(&data->file_list); spin_lock_init(&data->dev_lock); data->zombie = 0; /* Initialize USBTMC bTag and other fields */ data->bTag = 1; /* 2 <= bTag <= 127 USBTMC-USB488 subclass specification 4.3.1 */ data->iin_bTag = 2; /* USBTMC devices have only one setting, so use that */ iface_desc = data->intf->cur_altsetting; data->ifnum = iface_desc->desc.bInterfaceNumber; /* Find bulk endpoints */ retcode = usb_find_common_endpoints(iface_desc, &bulk_in, &bulk_out, NULL, NULL); if (retcode) { dev_err(&intf->dev, "bulk endpoints not found\n"); goto err_put; } retcode = -EINVAL; data->bulk_in = bulk_in->bEndpointAddress; data->wMaxPacketSize = usb_endpoint_maxp(bulk_in); if (!data->wMaxPacketSize) goto err_put; dev_dbg(&intf->dev, "Found bulk in endpoint at %u\n", data->bulk_in); data->bulk_out = bulk_out->bEndpointAddress; dev_dbg(&intf->dev, "Found Bulk out endpoint at %u\n", data->bulk_out); /* Find int endpoint */ retcode = usb_find_int_in_endpoint(iface_desc, &int_in); if (!retcode) { data->iin_ep_present = 1; data->iin_ep = int_in->bEndpointAddress; data->iin_wMaxPacketSize = usb_endpoint_maxp(int_in); data->iin_interval = int_in->bInterval; dev_dbg(&intf->dev, "Found Int in endpoint at %u\n", data->iin_ep); } retcode = get_capabilities(data); if (retcode) dev_err(&intf->dev, "can't read capabilities\n"); if (data->iin_ep_present) { /* allocate int urb */ data->iin_urb = usb_alloc_urb(0, GFP_KERNEL); if (!data->iin_urb) { retcode = -ENOMEM; goto error_register; } /* Protect interrupt in endpoint data until iin_urb is freed */ kref_get(&data->kref); /* allocate buffer for interrupt in */ data->iin_buffer = kmalloc(data->iin_wMaxPacketSize, GFP_KERNEL); if (!data->iin_buffer) { retcode = -ENOMEM; goto error_register; } /* fill interrupt urb */ usb_fill_int_urb(data->iin_urb, data->usb_dev, usb_rcvintpipe(data->usb_dev, data->iin_ep), data->iin_buffer, data->iin_wMaxPacketSize, usbtmc_interrupt, data, data->iin_interval); retcode = usb_submit_urb(data->iin_urb, GFP_KERNEL); if (retcode) { dev_err(&intf->dev, "Failed to submit iin_urb\n"); goto error_register; } } retcode = usb_register_dev(intf, &usbtmc_class); if (retcode) { dev_err(&intf->dev, "Not able to get a minor (base %u, slice default): %d\n", USBTMC_MINOR_BASE, retcode); goto error_register; } dev_dbg(&intf->dev, "Using minor number %d\n", intf->minor); return 0; error_register: usbtmc_free_int(data); err_put: kref_put(&data->kref, usbtmc_delete); return retcode; } static void usbtmc_disconnect(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); struct list_head *elem; usb_deregister_dev(intf, &usbtmc_class); mutex_lock(&data->io_mutex); data->zombie = 1; wake_up_interruptible_all(&data->waitq); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); usb_kill_anchored_urbs(&file_data->submitted); usb_scuttle_anchored_urbs(&file_data->in_anchor); } mutex_unlock(&data->io_mutex); usbtmc_free_int(data); kref_put(&data->kref, usbtmc_delete); } static void usbtmc_draw_down(struct usbtmc_file_data *file_data) { int time; time = usb_wait_anchor_empty_timeout(&file_data->submitted, 1000); if (!time) usb_kill_anchored_urbs(&file_data->submitted); usb_scuttle_anchored_urbs(&file_data->in_anchor); } static int usbtmc_suspend(struct usb_interface *intf, pm_message_t message) { struct usbtmc_device_data *data = usb_get_intfdata(intf); struct list_head *elem; if (!data) return 0; mutex_lock(&data->io_mutex); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); usbtmc_draw_down(file_data); } if (data->iin_ep_present && data->iin_urb) usb_kill_urb(data->iin_urb); mutex_unlock(&data->io_mutex); return 0; } static int usbtmc_resume(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); int retcode = 0; if (data->iin_ep_present && data->iin_urb) retcode = usb_submit_urb(data->iin_urb, GFP_KERNEL); if (retcode) dev_err(&intf->dev, "Failed to submit iin_urb\n"); return retcode; } static int usbtmc_pre_reset(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); struct list_head *elem; if (!data) return 0; mutex_lock(&data->io_mutex); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); usbtmc_ioctl_cancel_io(file_data); } return 0; } static int usbtmc_post_reset(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); mutex_unlock(&data->io_mutex); return 0; } static struct usb_driver usbtmc_driver = { .name = "usbtmc", .id_table = usbtmc_devices, .probe = usbtmc_probe, .disconnect = usbtmc_disconnect, .suspend = usbtmc_suspend, .resume = usbtmc_resume, .pre_reset = usbtmc_pre_reset, .post_reset = usbtmc_post_reset, .dev_groups = usbtmc_groups, }; module_usb_driver(usbtmc_driver); MODULE_DESCRIPTION("USB Test & Measurement class driver"); MODULE_LICENSE("GPL"); |
284 77 1 196 187 209 188 3 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H #include <kvm/iodev.h> #include <linux/kvm_host.h> #include "hyperv.h" #include "smm.h" #define KVM_APIC_INIT 0 #define KVM_APIC_SIPI 1 #define APIC_SHORT_MASK 0xc0000 #define APIC_DEST_NOSHORT 0x0 #define APIC_DEST_MASK 0x800 #define APIC_BUS_CYCLE_NS_DEFAULT 1 #define APIC_BROADCAST 0xFF #define X2APIC_BROADCAST 0xFFFFFFFFul enum lapic_mode { LAPIC_MODE_DISABLED = 0, LAPIC_MODE_INVALID = X2APIC_ENABLE, LAPIC_MODE_XAPIC = MSR_IA32_APICBASE_ENABLE, LAPIC_MODE_X2APIC = MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE, }; enum lapic_lvt_entry { LVT_TIMER, LVT_THERMAL_MONITOR, LVT_PERFORMANCE_COUNTER, LVT_LINT0, LVT_LINT1, LVT_ERROR, LVT_CMCI, KVM_APIC_MAX_NR_LVT_ENTRIES, }; #define APIC_LVTx(x) ((x) == LVT_CMCI ? APIC_LVTCMCI : APIC_LVTT + 0x10 * (x)) struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ ktime_t target_expiration; u32 timer_mode; u32 timer_mode_mask; u64 tscdeadline; u64 expired_tscdeadline; u32 timer_advance_ns; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; }; struct kvm_lapic { unsigned long base_address; struct kvm_io_device dev; struct kvm_timer lapic_timer; u32 divide_count; struct kvm_vcpu *vcpu; bool apicv_active; bool sw_enabled; bool irr_pending; bool lvt0_in_nmi_mode; /* Number of bits set in ISR. */ s16 isr_count; /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */ int highest_isr_cache; /** * APIC register page. The layout matches the register layout seen by * the guest 1:1, because it is accessed by the vmx microcode. * Note: Only one register, the TPR, is used by the microcode. */ void *regs; gpa_t vapic_addr; struct gfn_to_hva_cache vapic_cache; unsigned long pending_events; unsigned int sipi_vector; int nr_lvt_entries; }; struct dest_map; int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); void kvm_recalculate_apic_map(struct kvm *kvm); void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int shorthand, unsigned int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec); bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr); void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, struct dest_map *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); void kvm_apic_update_apicv(struct kvm_vcpu *vcpu); int kvm_alloc_apic_access_page(struct kvm *kvm); void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data); int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len); void kvm_lapic_exit(void); u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic); #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) static inline void kvm_lapic_clear_vector(int vec, void *bitmap) { clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } static inline void kvm_lapic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic) { kvm_lapic_set_vector(vec, apic->regs + APIC_IRR); /* * irr_pending must be true if any interrupt is pending; set it after * APIC_IRR to avoid race with apic_clear_irr */ apic->irr_pending = true; } static inline u32 __kvm_lapic_get_reg(char *regs, int reg_off) { return *((u32 *) (regs + reg_off)); } static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off) { return __kvm_lapic_get_reg(apic->regs, reg_off); } DECLARE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu); static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_has_noapic_vcpu)) return vcpu->arch.apic; return true; } extern struct static_key_false_deferred apic_hw_disabled; static inline bool kvm_apic_hw_enabled(struct kvm_lapic *apic) { if (static_branch_unlikely(&apic_hw_disabled.key)) return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; return true; } extern struct static_key_false_deferred apic_sw_disabled; static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic) { if (static_branch_unlikely(&apic_sw_disabled.key)) return apic->sw_enabled; return true; } static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); } static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) { return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); } static inline int apic_x2apic_mode(struct kvm_lapic *apic) { return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && vcpu->arch.apic->apicv_active; } static inline bool kvm_apic_has_pending_init_or_sipi(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events; } static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu) { return !is_smm(vcpu) && !kvm_x86_call(apic_init_signal_blocked)(vcpu); } static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) { return (irq->delivery_mode == APIC_DM_LOWEST || irq->msi_redir_hint); } static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, unsigned long *vcpu_bitmap); bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); int kvm_vector_to_index(u32 vector, u32 dest_vcpus, const unsigned long *bitmap, u32 bitmap_size); void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu); static inline enum lapic_mode kvm_apic_mode(u64 apic_base) { return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); } static inline u8 kvm_xapic_id(struct kvm_lapic *apic) { return kvm_lapic_get_reg(apic, APIC_ID) >> 24; } #endif |
2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2008 Oracle. All rights reserved. */ #ifndef BTRFS_DELAYED_REF_H #define BTRFS_DELAYED_REF_H #include <linux/types.h> #include <linux/refcount.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <uapi/linux/btrfs_tree.h> struct btrfs_trans_handle; struct btrfs_fs_info; /* these are the possible values of struct btrfs_delayed_ref_node->action */ enum btrfs_delayed_ref_action { /* Add one backref to the tree */ BTRFS_ADD_DELAYED_REF = 1, /* Delete one backref from the tree */ BTRFS_DROP_DELAYED_REF, /* Record a full extent allocation */ BTRFS_ADD_DELAYED_EXTENT, /* Not changing ref count on head ref */ BTRFS_UPDATE_DELAYED_HEAD, } __packed; struct btrfs_data_ref { /* For EXTENT_DATA_REF */ /* Inode which refers to this data extent */ u64 objectid; /* * file_offset - extent_offset * * file_offset is the key.offset of the EXTENT_DATA key. * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data. */ u64 offset; }; struct btrfs_tree_ref { /* * Level of this tree block. * * Shared for skinny (TREE_BLOCK_REF) and normal tree ref. */ int level; /* For non-skinny metadata, no special member needed */ }; struct btrfs_delayed_ref_node { struct rb_node ref_node; /* * If action is BTRFS_ADD_DELAYED_REF, also link this node to * ref_head->ref_add_list, then we do not need to iterate the * whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes. */ struct list_head add_list; /* the starting bytenr of the extent */ u64 bytenr; /* the size of the extent */ u64 num_bytes; /* seq number to keep track of insertion order */ u64 seq; /* The ref_root for this ref */ u64 ref_root; /* * The parent for this ref, if this isn't set the ref_root is the * reference owner. */ u64 parent; /* ref count on this data structure */ refcount_t refs; /* * how many refs is this entry adding or deleting. For * head refs, this may be a negative number because it is keeping * track of the total mods done to the reference count. * For individual refs, this will always be a positive number * * It may be more than one, since it is possible for a single * parent to have more than one ref on an extent */ int ref_mod; unsigned int action:8; unsigned int type:8; union { struct btrfs_tree_ref tree_ref; struct btrfs_data_ref data_ref; }; }; struct btrfs_delayed_extent_op { struct btrfs_disk_key key; bool update_key; bool update_flags; u64 flags_to_set; }; /* * the head refs are used to hold a lock on a given extent, which allows us * to make sure that only one process is running the delayed refs * at a time for a single extent. They also store the sum of all the * reference count modifications we've queued up. */ struct btrfs_delayed_ref_head { u64 bytenr; u64 num_bytes; /* * For insertion into struct btrfs_delayed_ref_root::href_root. * Keep it in the same cache line as 'bytenr' for more efficient * searches in the rbtree. */ struct rb_node href_node; /* * the mutex is held while running the refs, and it is also * held when checking the sum of reference modifications. */ struct mutex mutex; refcount_t refs; /* Protects 'ref_tree' and 'ref_add_list'. */ spinlock_t lock; struct rb_root_cached ref_tree; /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ struct list_head ref_add_list; struct btrfs_delayed_extent_op *extent_op; /* * This is used to track the final ref_mod from all the refs associated * with this head ref, this is not adjusted as delayed refs are run, * this is meant to track if we need to do the csum accounting or not. */ int total_ref_mod; /* * This is the current outstanding mod references for this bytenr. This * is used with lookup_extent_info to get an accurate reference count * for a bytenr, so it is adjusted as delayed refs are run so that any * on disk reference count + ref_mod is accurate. */ int ref_mod; /* * The root that triggered the allocation when must_insert_reserved is * set to true. */ u64 owning_root; /* * Track reserved bytes when setting must_insert_reserved. On success * or cleanup, we will need to free the reservation. */ u64 reserved_bytes; /* Tree block level, for metadata only. */ u8 level; /* * when a new extent is allocated, it is just reserved in memory * The actual extent isn't inserted into the extent allocation tree * until the delayed ref is processed. must_insert_reserved is * used to flag a delayed ref so the accounting can be updated * when a full insert is done. * * It is possible the extent will be freed before it is ever * inserted into the extent allocation tree. In this case * we need to update the in ram accounting to properly reflect * the free has happened. */ bool must_insert_reserved; bool is_data; bool is_system; bool processing; }; enum btrfs_delayed_ref_flags { /* Indicate that we are flushing delayed refs for the commit */ BTRFS_DELAYED_REFS_FLUSHING, }; struct btrfs_delayed_ref_root { /* head ref rbtree */ struct rb_root_cached href_root; /* * Track dirty extent records. * The keys correspond to the logical address of the extent ("bytenr") * right shifted by fs_info->sectorsize_bits. This is both to get a more * dense index space (optimizes xarray structure) and because indexes in * xarrays are of "unsigned long" type, meaning they are 32 bits wide on * 32 bits platforms, limiting the extent range to 4G which is too low * and makes it unusable (truncated index values) on 32 bits platforms. */ struct xarray dirty_extents; /* this spin lock protects the rbtree and the entries inside */ spinlock_t lock; /* how many delayed ref updates we've queued, used by the * throttling code */ atomic_t num_entries; /* total number of head nodes in tree */ unsigned long num_heads; /* total number of head nodes ready for processing */ unsigned long num_heads_ready; u64 pending_csums; unsigned long flags; u64 run_delayed_start; /* * To make qgroup to skip given root. * This is for snapshot, as btrfs_qgroup_inherit() will manually * modify counters for snapshot and its source, so we should skip * the snapshot in new_root/old_roots or it will get calculated twice */ u64 qgroup_to_skip; }; enum btrfs_ref_type { BTRFS_REF_NOT_SET, BTRFS_REF_DATA, BTRFS_REF_METADATA, BTRFS_REF_LAST, } __packed; struct btrfs_ref { enum btrfs_ref_type type; enum btrfs_delayed_ref_action action; /* * Whether this extent should go through qgroup record. * * Normally false, but for certain cases like delayed subtree scan, * setting this flag can hugely reduce qgroup overhead. */ bool skip_qgroup; #ifdef CONFIG_BTRFS_FS_REF_VERIFY /* Through which root is this modification. */ u64 real_root; #endif u64 bytenr; u64 num_bytes; u64 owning_root; /* * The root that owns the reference for this reference, this will be set * or ->parent will be set, depending on what type of reference this is. */ u64 ref_root; /* Bytenr of the parent tree block */ u64 parent; union { struct btrfs_data_ref data_ref; struct btrfs_tree_ref tree_ref; }; }; extern struct kmem_cache *btrfs_delayed_ref_head_cachep; extern struct kmem_cache *btrfs_delayed_ref_node_cachep; extern struct kmem_cache *btrfs_delayed_extent_op_cachep; int __init btrfs_delayed_ref_init(void); void __cold btrfs_delayed_ref_exit(void); static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_info, int num_delayed_refs) { u64 num_bytes; num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_delayed_refs); /* * We have to check the mount option here because we could be enabling * the free space tree for the first time and don't have the compat_ro * option set yet. * * We need extra reservations if we have the free space tree because * we'll have to modify that tree as well. */ if (btrfs_test_opt(fs_info, FREE_SPACE_TREE)) num_bytes *= 2; return num_bytes; } static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info, int num_csum_items) { /* * Deleting csum items does not result in new nodes/leaves and does not * require changing the free space tree, only the csum tree, so this is * all we need. */ return btrfs_calc_metadata_size(fs_info, num_csum_items); } void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root, bool skip_qgroup); void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset, u64 mod_root, bool skip_qgroup); static inline struct btrfs_delayed_extent_op * btrfs_alloc_delayed_extent_op(void) { return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS); } static inline void btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) { if (op) kmem_cache_free(btrfs_delayed_extent_op_cachep, op); } void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref); static inline u64 btrfs_ref_head_to_space_flags( struct btrfs_delayed_ref_head *head_ref) { if (head_ref->is_data) return BTRFS_BLOCK_GROUP_DATA; else if (head_ref->is_system) return BTRFS_BLOCK_GROUP_SYSTEM; return BTRFS_BLOCK_GROUP_METADATA; } static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head) { if (refcount_dec_and_test(&head->refs)) kmem_cache_free(btrfs_delayed_ref_head_cachep, head); } int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, struct btrfs_delayed_extent_op *extent_op); int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, u64 reserved); int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u8 level, struct btrfs_delayed_extent_op *extent_op); void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head); struct btrfs_delayed_ref_head * btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr); int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head); static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) { mutex_unlock(&head->mutex); } void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head); struct btrfs_delayed_ref_head *btrfs_select_ref_head( struct btrfs_delayed_ref_root *delayed_refs); int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq); void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums); void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, u64 root, u64 parent); static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) { if (node->type == BTRFS_EXTENT_DATA_REF_KEY || node->type == BTRFS_SHARED_DATA_REF_KEY) return node->data_ref.objectid; return node->tree_ref.level; } static inline u64 btrfs_delayed_ref_offset(struct btrfs_delayed_ref_node *node) { if (node->type == BTRFS_EXTENT_DATA_REF_KEY || node->type == BTRFS_SHARED_DATA_REF_KEY) return node->data_ref.offset; return 0; } static inline u8 btrfs_ref_type(struct btrfs_ref *ref) { ASSERT(ref->type == BTRFS_REF_DATA || ref->type == BTRFS_REF_METADATA); if (ref->type == BTRFS_REF_DATA) { if (ref->parent) return BTRFS_SHARED_DATA_REF_KEY; else return BTRFS_EXTENT_DATA_REF_KEY; } else { if (ref->parent) return BTRFS_SHARED_BLOCK_REF_KEY; else return BTRFS_TREE_BLOCK_REF_KEY; } return 0; } #endif |
4 2 9 735 704 40 732 11 11 1230 636 4952 4954 11 70 138 138 135 794 36 14 22 54 52 52 52 50 3 3 16 10 6 85 16 16 3 6 6 35 6 10 57 6 115 23 18 4 14 9 66 36 6 41 41 41 50 50 24 6 17 92 91 23 17 18 46 40 6 29 29 40 20 66 66 36 36 36 36 36 24 27 28 13 10 2 1 4 2 1 4 19 19 9 11 4 4 8 1 13 9 1 649 3 631 18 648 646 636 2 5 17 633 12 10 2 12 12 12 648 640 10 648 647 59 3 15 46 5 52 52 57 19 41 27 18 18 1 17 17 20 30 3 4 3 4 6 5 5 8 2 5 5 5 11 9 9 9 9 8 12 8 34 34 34 47 5 3 3 8 44 8 56 57 43 30 15 51 57 1 2 14 17 16 17 4 14 14 71 80 81 69 71 70 59 60 31 29 60 43 38 60 60 37 24 24 1 19 6 23 2 6 6 17 22 22 21 18 18 18 6 6 6 52 52 9 9 6 1 2 5 6 6 12 28 28 28 28 15 9 1 6 2 4 14 14 18 28 28 28 15 14 2 9 4 2 9 3 6 9 9 36 28 11 28 10 28 36 39 39 2 36 2 14 17 14 17 42 32 18 6 6 21 20 33 1 23 13 21 4 19 3 19 24 18 6 21 5 42 1 41 13 3 11 18 7 17 5 55 1 58 55 14 11 13 40 21 77 79 64 31 2 2 55 35 21 2 77 2800 2792 11 11 11 1172 1 631 632 1216 11 11 5 11 11 3818 3772 1227 1227 1227 1228 1225 16 16 3 16 16 4 4 5 7 3 4 3 15 3 3 2 12 9 2 4 13 13 4 4 9 9 9 4 15 7 2 7 80 67 15 15 15 13 13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/locks.c * * We implement four types of file locks: BSD locks, posix locks, open * file description locks, and leases. For details about BSD locks, * see the flock(2) man page; for details about the other three, see * fcntl(2). * * * Locking conflicts and dependencies: * If multiple threads attempt to lock the same byte (or flock the same file) * only one can be granted the lock, and other must wait their turn. * The first lock has been "applied" or "granted", the others are "waiting" * and are "blocked" by the "applied" lock.. * * Waiting and applied locks are all kept in trees whose properties are: * * - the root of a tree may be an applied or waiting lock. * - every other node in the tree is a waiting lock that * conflicts with every ancestor of that node. * * Every such tree begins life as a waiting singleton which obviously * satisfies the above properties. * * The only ways we modify trees preserve these properties: * * 1. We may add a new leaf node, but only after first verifying that it * conflicts with all of its ancestors. * 2. We may remove the root of a tree, creating a new singleton * tree from the root and N new trees rooted in the immediate * children. * 3. If the root of a tree is not currently an applied lock, we may * apply it (if possible). * 4. We may upgrade the root of the tree (either extend its range, * or upgrade its entire range from read to write). * * When an applied lock is modified in a way that reduces or downgrades any * part of its range, we remove all its children (2 above). This particularly * happens when a lock is unlocked. * * For each of those child trees we "wake up" the thread which is * waiting for the lock so it can continue handling as follows: if the * root of the tree applies, we do so (3). If it doesn't, it must * conflict with some applied lock. We remove (wake up) all of its children * (2), and add it is a new leaf to the tree rooted in the applied * lock (1). We then repeat the process recursively with those * children. * */ #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/filelock.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/security.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/time.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> #include <linux/hashtable.h> #include <linux/percpu.h> #include <linux/sysctl.h> #define CREATE_TRACE_POINTS #include <trace/events/filelock.h> #include <linux/uaccess.h> static struct file_lock *file_lock(struct file_lock_core *flc) { return container_of(flc, struct file_lock, c); } static struct file_lease *file_lease(struct file_lock_core *flc) { return container_of(flc, struct file_lease, c); } static bool lease_breaking(struct file_lease *fl) { return fl->c.flc_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); } static int target_leasetype(struct file_lease *fl) { if (fl->c.flc_flags & FL_UNLOCK_PENDING) return F_UNLCK; if (fl->c.flc_flags & FL_DOWNGRADE_PENDING) return F_RDLCK; return fl->c.flc_type; } static int leases_enable = 1; static int lease_break_time = 45; #ifdef CONFIG_SYSCTL static struct ctl_table locks_sysctls[] = { { .procname = "leases-enable", .data = &leases_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #ifdef CONFIG_MMU { .procname = "lease-break-time", .data = &lease_break_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif /* CONFIG_MMU */ }; static int __init init_fs_locks_sysctls(void) { register_sysctl_init("fs", locks_sysctls); return 0; } early_initcall(init_fs_locks_sysctls); #endif /* CONFIG_SYSCTL */ /* * The global file_lock_list is only used for displaying /proc/locks, so we * keep a list on each CPU, with each list protected by its own spinlock. * Global serialization is done using file_rwsem. * * Note that alterations to the list also require that the relevant flc_lock is * held. */ struct file_lock_list_struct { spinlock_t lock; struct hlist_head hlist; }; static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list); DEFINE_STATIC_PERCPU_RWSEM(file_rwsem); /* * The blocked_hash is used to find POSIX lock loops for deadlock detection. * It is protected by blocked_lock_lock. * * We hash locks by lockowner in order to optimize searching for the lock a * particular lockowner is waiting on. * * FIXME: make this value scale via some heuristic? We generally will want more * buckets when we have more lockowners holding locks, but that's a little * difficult to determine without knowing what the workload will look like. */ #define BLOCKED_HASH_BITS 7 static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); /* * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * * In addition, it also protects the fl->fl_blocked_requests list, and the * fl->fl_blocker pointer for file_lock structures that are acting as lock * requests (in contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the * flc_lock. */ static DEFINE_SPINLOCK(blocked_lock_lock); static struct kmem_cache *flctx_cache __ro_after_init; static struct kmem_cache *filelock_cache __ro_after_init; static struct kmem_cache *filelease_cache __ro_after_init; static struct file_lock_context * locks_get_lock_context(struct inode *inode, int type) { struct file_lock_context *ctx; /* paired with cmpxchg() below */ ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL); if (!ctx) goto out; spin_lock_init(&ctx->flc_lock); INIT_LIST_HEAD(&ctx->flc_flock); INIT_LIST_HEAD(&ctx->flc_posix); INIT_LIST_HEAD(&ctx->flc_lease); /* * Assign the pointer if it's not already assigned. If it is, then * free the context we just allocated. */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); return ctx; } static void locks_dump_ctx_list(struct list_head *list, char *list_type) { struct file_lock_core *flc; list_for_each_entry(flc, list, flc_list) pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, flc->flc_owner, flc->flc_flags, flc->flc_type, flc->flc_pid); } static void locks_check_ctx_lists(struct inode *inode) { struct file_lock_context *ctx = inode->i_flctx; if (unlikely(!list_empty(&ctx->flc_flock) || !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_lease))) { pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); locks_dump_ctx_list(&ctx->flc_flock, "FLOCK"); locks_dump_ctx_list(&ctx->flc_posix, "POSIX"); locks_dump_ctx_list(&ctx->flc_lease, "LEASE"); } } static void locks_check_ctx_file_list(struct file *filp, struct list_head *list, char *list_type) { struct file_lock_core *flc; struct inode *inode = file_inode(filp); list_for_each_entry(flc, list, flc_list) if (flc->flc_file == filp) pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx " " fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino, flc->flc_owner, flc->flc_flags, flc->flc_type, flc->flc_pid); } void locks_free_lock_context(struct inode *inode) { struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); kmem_cache_free(flctx_cache, ctx); } } static void locks_init_lock_heads(struct file_lock_core *flc) { INIT_HLIST_NODE(&flc->flc_link); INIT_LIST_HEAD(&flc->flc_list); INIT_LIST_HEAD(&flc->flc_blocked_requests); INIT_LIST_HEAD(&flc->flc_blocked_member); init_waitqueue_head(&flc->flc_wait); } /* Allocate an empty lock structure. */ struct file_lock *locks_alloc_lock(void) { struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL); if (fl) locks_init_lock_heads(&fl->c); return fl; } EXPORT_SYMBOL_GPL(locks_alloc_lock); /* Allocate an empty lock structure. */ struct file_lease *locks_alloc_lease(void) { struct file_lease *fl = kmem_cache_zalloc(filelease_cache, GFP_KERNEL); if (fl) locks_init_lock_heads(&fl->c); return fl; } EXPORT_SYMBOL_GPL(locks_alloc_lease); void locks_release_private(struct file_lock *fl) { struct file_lock_core *flc = &fl->c; BUG_ON(waitqueue_active(&flc->flc_wait)); BUG_ON(!list_empty(&flc->flc_list)); BUG_ON(!list_empty(&flc->flc_blocked_requests)); BUG_ON(!list_empty(&flc->flc_blocked_member)); BUG_ON(!hlist_unhashed(&flc->flc_link)); if (fl->fl_ops) { if (fl->fl_ops->fl_release_private) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; } if (fl->fl_lmops) { if (fl->fl_lmops->lm_put_owner) { fl->fl_lmops->lm_put_owner(flc->flc_owner); flc->flc_owner = NULL; } fl->fl_lmops = NULL; } } EXPORT_SYMBOL_GPL(locks_release_private); /** * locks_owner_has_blockers - Check for blocking lock requests * @flctx: file lock context * @owner: lock owner * * Return values: * %true: @owner has at least one blocker * %false: @owner has no blockers */ bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner) { struct file_lock_core *flc; spin_lock(&flctx->flc_lock); list_for_each_entry(flc, &flctx->flc_posix, flc_list) { if (flc->flc_owner != owner) continue; if (!list_empty(&flc->flc_blocked_requests)) { spin_unlock(&flctx->flc_lock); return true; } } spin_unlock(&flctx->flc_lock); return false; } EXPORT_SYMBOL_GPL(locks_owner_has_blockers); /* Free a lock which is not in use. */ void locks_free_lock(struct file_lock *fl) { locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } EXPORT_SYMBOL(locks_free_lock); /* Free a lease which is not in use. */ void locks_free_lease(struct file_lease *fl) { kmem_cache_free(filelease_cache, fl); } EXPORT_SYMBOL(locks_free_lease); static void locks_dispose_list(struct list_head *dispose) { struct file_lock_core *flc; while (!list_empty(dispose)) { flc = list_first_entry(dispose, struct file_lock_core, flc_list); list_del_init(&flc->flc_list); if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) locks_free_lease(file_lease(flc)); else locks_free_lock(file_lock(flc)); } } void locks_init_lock(struct file_lock *fl) { memset(fl, 0, sizeof(struct file_lock)); locks_init_lock_heads(&fl->c); } EXPORT_SYMBOL(locks_init_lock); void locks_init_lease(struct file_lease *fl) { memset(fl, 0, sizeof(*fl)); locks_init_lock_heads(&fl->c); } EXPORT_SYMBOL(locks_init_lease); /* * Initialize a new lock from an existing file_lock structure. */ void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { new->c.flc_owner = fl->c.flc_owner; new->c.flc_pid = fl->c.flc_pid; new->c.flc_file = NULL; new->c.flc_flags = fl->c.flc_flags; new->c.flc_type = fl->c.flc_type; new->fl_start = fl->fl_start; new->fl_end = fl->fl_end; new->fl_lmops = fl->fl_lmops; new->fl_ops = NULL; if (fl->fl_lmops) { if (fl->fl_lmops->lm_get_owner) fl->fl_lmops->lm_get_owner(fl->c.flc_owner); } } EXPORT_SYMBOL(locks_copy_conflock); void locks_copy_lock(struct file_lock *new, struct file_lock *fl) { /* "new" must be a freshly-initialized lock */ WARN_ON_ONCE(new->fl_ops); locks_copy_conflock(new, fl); new->c.flc_file = fl->c.flc_file; new->fl_ops = fl->fl_ops; if (fl->fl_ops) { if (fl->fl_ops->fl_copy_lock) fl->fl_ops->fl_copy_lock(new, fl); } } EXPORT_SYMBOL(locks_copy_lock); static void locks_move_blocks(struct file_lock *new, struct file_lock *fl) { struct file_lock *f; /* * As ctx->flc_lock is held, new requests cannot be added to * ->flc_blocked_requests, so we don't need a lock to check if it * is empty. */ if (list_empty(&fl->c.flc_blocked_requests)) return; spin_lock(&blocked_lock_lock); list_splice_init(&fl->c.flc_blocked_requests, &new->c.flc_blocked_requests); list_for_each_entry(f, &new->c.flc_blocked_requests, c.flc_blocked_member) f->c.flc_blocker = &new->c; spin_unlock(&blocked_lock_lock); } static inline int flock_translate_cmd(int cmd) { switch (cmd) { case LOCK_SH: return F_RDLCK; case LOCK_EX: return F_WRLCK; case LOCK_UN: return F_UNLCK; } return -EINVAL; } /* Fill in a file_lock structure with an appropriate FLOCK lock. */ static void flock_make_lock(struct file *filp, struct file_lock *fl, int type) { locks_init_lock(fl); fl->c.flc_file = filp; fl->c.flc_owner = filp; fl->c.flc_pid = current->tgid; fl->c.flc_flags = FL_FLOCK; fl->c.flc_type = type; fl->fl_end = OFFSET_MAX; } static int assign_type(struct file_lock_core *flc, int type) { switch (type) { case F_RDLCK: case F_WRLCK: case F_UNLCK: flc->flc_type = type; break; default: return -EINVAL; } return 0; } static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, struct flock64 *l) { switch (l->l_whence) { case SEEK_SET: fl->fl_start = 0; break; case SEEK_CUR: fl->fl_start = filp->f_pos; break; case SEEK_END: fl->fl_start = i_size_read(file_inode(filp)); break; default: return -EINVAL; } if (l->l_start > OFFSET_MAX - fl->fl_start) return -EOVERFLOW; fl->fl_start += l->l_start; if (fl->fl_start < 0) return -EINVAL; /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ if (l->l_len > 0) { if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) return -EOVERFLOW; fl->fl_end = fl->fl_start + (l->l_len - 1); } else if (l->l_len < 0) { if (fl->fl_start + l->l_len < 0) return -EINVAL; fl->fl_end = fl->fl_start - 1; fl->fl_start += l->l_len; } else fl->fl_end = OFFSET_MAX; fl->c.flc_owner = current->files; fl->c.flc_pid = current->tgid; fl->c.flc_file = filp; fl->c.flc_flags = FL_POSIX; fl->fl_ops = NULL; fl->fl_lmops = NULL; return assign_type(&fl->c, l->l_type); } /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX * style lock. */ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, struct flock *l) { struct flock64 ll = { .l_type = l->l_type, .l_whence = l->l_whence, .l_start = l->l_start, .l_len = l->l_len, }; return flock64_to_posix_lock(filp, fl, &ll); } /* default lease lock manager operations */ static bool lease_break_callback(struct file_lease *fl) { kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); return false; } static void lease_setup(struct file_lease *fl, void **priv) { struct file *filp = fl->c.flc_file; struct fasync_struct *fa = *priv; /* * fasync_insert_entry() returns the old entry if any. If there was no * old entry, then it used "priv" and inserted it into the fasync list. * Clear the pointer to indicate that it shouldn't be freed. */ if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa)) *priv = NULL; __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); } static const struct lease_manager_operations lease_manager_ops = { .lm_break = lease_break_callback, .lm_change = lease_modify, .lm_setup = lease_setup, }; /* * Initialize a lease, use the default lock manager operations */ static int lease_init(struct file *filp, int type, struct file_lease *fl) { if (assign_type(&fl->c, type) != 0) return -EINVAL; fl->c.flc_owner = filp; fl->c.flc_pid = current->tgid; fl->c.flc_file = filp; fl->c.flc_flags = FL_LEASE; fl->fl_lmops = &lease_manager_ops; return 0; } /* Allocate a file_lock initialised to this type of lease */ static struct file_lease *lease_alloc(struct file *filp, int type) { struct file_lease *fl = locks_alloc_lease(); int error = -ENOMEM; if (fl == NULL) return ERR_PTR(error); error = lease_init(filp, type, fl); if (error) { locks_free_lease(fl); return ERR_PTR(error); } return fl; } /* Check if two locks overlap each other. */ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) { return ((fl1->fl_end >= fl2->fl_start) && (fl2->fl_end >= fl1->fl_start)); } /* * Check whether two locks have the same owner. */ static int posix_same_owner(struct file_lock_core *fl1, struct file_lock_core *fl2) { return fl1->flc_owner == fl2->flc_owner; } /* Must be called with the flc_lock held! */ static void locks_insert_global_locks(struct file_lock_core *flc) { struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list); percpu_rwsem_assert_held(&file_rwsem); spin_lock(&fll->lock); flc->flc_link_cpu = smp_processor_id(); hlist_add_head(&flc->flc_link, &fll->hlist); spin_unlock(&fll->lock); } /* Must be called with the flc_lock held! */ static void locks_delete_global_locks(struct file_lock_core *flc) { struct file_lock_list_struct *fll; percpu_rwsem_assert_held(&file_rwsem); /* * Avoid taking lock if already unhashed. This is safe since this check * is done while holding the flc_lock, and new insertions into the list * also require that it be held. */ if (hlist_unhashed(&flc->flc_link)) return; fll = per_cpu_ptr(&file_lock_list, flc->flc_link_cpu); spin_lock(&fll->lock); hlist_del_init(&flc->flc_link); spin_unlock(&fll->lock); } static unsigned long posix_owner_key(struct file_lock_core *flc) { return (unsigned long) flc->flc_owner; } static void locks_insert_global_blocked(struct file_lock_core *waiter) { lockdep_assert_held(&blocked_lock_lock); hash_add(blocked_hash, &waiter->flc_link, posix_owner_key(waiter)); } static void locks_delete_global_blocked(struct file_lock_core *waiter) { lockdep_assert_held(&blocked_lock_lock); hash_del(&waiter->flc_link); } /* Remove waiter from blocker's block list. * When blocker ends up pointing to itself then the list is empty. * * Must be called with blocked_lock_lock held. */ static void __locks_unlink_block(struct file_lock_core *waiter) { locks_delete_global_blocked(waiter); list_del_init(&waiter->flc_blocked_member); } static void __locks_wake_up_blocks(struct file_lock_core *blocker) { while (!list_empty(&blocker->flc_blocked_requests)) { struct file_lock_core *waiter; struct file_lock *fl; waiter = list_first_entry(&blocker->flc_blocked_requests, struct file_lock_core, flc_blocked_member); fl = file_lock(waiter); __locks_unlink_block(waiter); if ((waiter->flc_flags & (FL_POSIX | FL_FLOCK)) && fl->fl_lmops && fl->fl_lmops->lm_notify) fl->fl_lmops->lm_notify(fl); else locks_wake_up(fl); /* * The setting of flc_blocker to NULL marks the "done" * point in deleting a block. Paired with acquire at the top * of locks_delete_block(). */ smp_store_release(&waiter->flc_blocker, NULL); } } static int __locks_delete_block(struct file_lock_core *waiter) { int status = -ENOENT; /* * If fl_blocker is NULL, it won't be set again as this thread "owns" * the lock and is the only one that might try to claim the lock. * * We use acquire/release to manage fl_blocker so that we can * optimize away taking the blocked_lock_lock in many cases. * * The smp_load_acquire guarantees two things: * * 1/ that fl_blocked_requests can be tested locklessly. If something * was recently added to that list it must have been in a locked region * *before* the locked region when fl_blocker was set to NULL. * * 2/ that no other thread is accessing 'waiter', so it is safe to free * it. __locks_wake_up_blocks is careful not to touch waiter after * fl_blocker is released. * * If a lockless check of fl_blocker shows it to be NULL, we know that * no new locks can be inserted into its fl_blocked_requests list, and * can avoid doing anything further if the list is empty. */ if (!smp_load_acquire(&waiter->flc_blocker) && list_empty(&waiter->flc_blocked_requests)) return status; spin_lock(&blocked_lock_lock); if (waiter->flc_blocker) status = 0; __locks_wake_up_blocks(waiter); __locks_unlink_block(waiter); /* * The setting of fl_blocker to NULL marks the "done" point in deleting * a block. Paired with acquire at the top of this function. */ smp_store_release(&waiter->flc_blocker, NULL); spin_unlock(&blocked_lock_lock); return status; } /** * locks_delete_block - stop waiting for a file lock * @waiter: the lock which was waiting * * lockd/nfsd need to disconnect the lock while working on it. */ int locks_delete_block(struct file_lock *waiter) { return __locks_delete_block(&waiter->c); } EXPORT_SYMBOL(locks_delete_block); /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The * fl_blocked_requests list itself is protected by the blocked_lock_lock, * but by ensuring that the flc_lock is also held on insertions we can avoid * taking the blocked_lock_lock in some cases when we see that the * fl_blocked_requests list is empty. * * Rather than just adding to the list, we check for conflicts with any existing * waiters, and add beneath any waiter that blocks the new waiter. * Thus wakeups don't happen until needed. */ static void __locks_insert_block(struct file_lock_core *blocker, struct file_lock_core *waiter, bool conflict(struct file_lock_core *, struct file_lock_core *)) { struct file_lock_core *flc; BUG_ON(!list_empty(&waiter->flc_blocked_member)); new_blocker: list_for_each_entry(flc, &blocker->flc_blocked_requests, flc_blocked_member) if (conflict(flc, waiter)) { blocker = flc; goto new_blocker; } waiter->flc_blocker = blocker; list_add_tail(&waiter->flc_blocked_member, &blocker->flc_blocked_requests); if ((blocker->flc_flags & (FL_POSIX|FL_OFDLCK)) == FL_POSIX) locks_insert_global_blocked(waiter); /* The requests in waiter->flc_blocked are known to conflict with * waiter, but might not conflict with blocker, or the requests * and lock which block it. So they all need to be woken. */ __locks_wake_up_blocks(waiter); } /* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock_core *blocker, struct file_lock_core *waiter, bool conflict(struct file_lock_core *, struct file_lock_core *)) { spin_lock(&blocked_lock_lock); __locks_insert_block(blocker, waiter, conflict); spin_unlock(&blocked_lock_lock); } /* * Wake up processes blocked waiting for blocker. * * Must be called with the inode->flc_lock held! */ static void locks_wake_up_blocks(struct file_lock_core *blocker) { /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and * the flc_lock is always held here. Note that removal from the * fl_blocked_requests list does not require the flc_lock, so we must * recheck list_empty() after acquiring the blocked_lock_lock. */ if (list_empty(&blocker->flc_blocked_requests)) return; spin_lock(&blocked_lock_lock); __locks_wake_up_blocks(blocker); spin_unlock(&blocked_lock_lock); } static void locks_insert_lock_ctx(struct file_lock_core *fl, struct list_head *before) { list_add_tail(&fl->flc_list, before); locks_insert_global_locks(fl); } static void locks_unlink_lock_ctx(struct file_lock_core *fl) { locks_delete_global_locks(fl); list_del_init(&fl->flc_list); locks_wake_up_blocks(fl); } static void locks_delete_lock_ctx(struct file_lock_core *fl, struct list_head *dispose) { locks_unlink_lock_ctx(fl); if (dispose) list_add(&fl->flc_list, dispose); else locks_free_lock(file_lock(fl)); } /* Determine if lock sys_fl blocks lock caller_fl. Common functionality * checks for shared/exclusive status of overlapping locks. */ static bool locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc) { if (sys_flc->flc_type == F_WRLCK) return true; if (caller_flc->flc_type == F_WRLCK) return true; return false; } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict(). */ static bool posix_locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc) { struct file_lock *caller_fl = file_lock(caller_flc); struct file_lock *sys_fl = file_lock(sys_flc); /* POSIX locks owned by the same process do not conflict with * each other. */ if (posix_same_owner(caller_flc, sys_flc)) return false; /* Check whether they overlap */ if (!locks_overlap(caller_fl, sys_fl)) return false; return locks_conflict(caller_flc, sys_flc); } /* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK * path so checks for additional GETLK-specific things like F_UNLCK. */ static bool posix_test_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) { struct file_lock_core *caller = &caller_fl->c; struct file_lock_core *sys = &sys_fl->c; /* F_UNLCK checks any locks on the same fd. */ if (lock_is_unlock(caller_fl)) { if (!posix_same_owner(caller, sys)) return false; return locks_overlap(caller_fl, sys_fl); } return posix_locks_conflict(caller, sys); } /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific * checking before calling the locks_conflict(). */ static bool flock_locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc) { /* FLOCK locks referring to the same filp do not conflict with * each other. */ if (caller_flc->flc_file == sys_flc->flc_file) return false; return locks_conflict(caller_flc, sys_flc); } void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; struct file_lock_context *ctx; struct inode *inode = file_inode(filp); void *owner; void (*func)(void); ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->c.flc_type = F_UNLCK; return; } retry: spin_lock(&ctx->flc_lock); list_for_each_entry(cfl, &ctx->flc_posix, c.flc_list) { if (!posix_test_locks_conflict(fl, cfl)) continue; if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) { owner = cfl->fl_lmops->lm_mod_owner; func = cfl->fl_lmops->lm_expire_lock; __module_get(owner); spin_unlock(&ctx->flc_lock); (*func)(); module_put(owner); goto retry; } locks_copy_conflock(fl, cfl); goto out; } fl->c.flc_type = F_UNLCK; out: spin_unlock(&ctx->flc_lock); return; } EXPORT_SYMBOL(posix_test_lock); /* * Deadlock detection: * * We attempt to detect deadlocks that are due purely to posix file * locks. * * We assume that a task can be waiting for at most one lock at a time. * So for any acquired lock, the process holding that lock may be * waiting on at most one other lock. That lock in turns may be held by * someone waiting for at most one other lock. Given a requested lock * caller_fl which is about to wait for a conflicting lock block_fl, we * follow this chain of waiters to ensure we are not about to create a * cycle. * * Since we do this before we ever put a process to sleep on a lock, we * are ensured that there is never a cycle; that is what guarantees that * the while() loop in posix_locks_deadlock() eventually completes. * * Note: the above assumption may not be true when handling lock * requests from a broken NFS client. It may also fail in the presence * of tasks (such as posix threads) sharing the same open file table. * To handle those cases, we just bail out after a few iterations. * * For FL_OFDLCK locks, the owner is the filp, not the files_struct. * Because the owner is not even nominally tied to a thread of * execution, the deadlock detection below can't reasonably work well. Just * skip it for those. * * In principle, we could do a more limited deadlock detection on FL_OFDLCK * locks that just checks for the case where two tasks are attempting to * upgrade from read to write locks on the same inode. */ #define MAX_DEADLK_ITERATIONS 10 /* Find a lock that the owner of the given @blocker is blocking on. */ static struct file_lock_core *what_owner_is_waiting_for(struct file_lock_core *blocker) { struct file_lock_core *flc; hash_for_each_possible(blocked_hash, flc, flc_link, posix_owner_key(blocker)) { if (posix_same_owner(flc, blocker)) { while (flc->flc_blocker) flc = flc->flc_blocker; return flc; } } return NULL; } /* Must be called with the blocked_lock_lock held! */ static bool posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { struct file_lock_core *caller = &caller_fl->c; struct file_lock_core *blocker = &block_fl->c; int i = 0; lockdep_assert_held(&blocked_lock_lock); /* * This deadlock detector can't reasonably detect deadlocks with * FL_OFDLCK locks, since they aren't owned by a process, per-se. */ if (caller->flc_flags & FL_OFDLCK) return false; while ((blocker = what_owner_is_waiting_for(blocker))) { if (i++ > MAX_DEADLK_ITERATIONS) return false; if (posix_same_owner(caller, blocker)) return true; } return false; } /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks * after any leases, but before any posix locks. * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; int error = 0; bool found = false; LIST_HEAD(dispose); ctx = locks_get_lock_context(inode, request->c.flc_type); if (!ctx) { if (request->c.flc_type != F_UNLCK) return -ENOMEM; return (request->c.flc_flags & FL_EXISTS) ? -ENOENT : 0; } if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK)) { new_fl = locks_alloc_lock(); if (!new_fl) return -ENOMEM; } percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); if (request->c.flc_flags & FL_ACCESS) goto find_conflict; list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) { if (request->c.flc_file != fl->c.flc_file) continue; if (request->c.flc_type == fl->c.flc_type) goto out; found = true; locks_delete_lock_ctx(&fl->c, &dispose); break; } if (lock_is_unlock(request)) { if ((request->c.flc_flags & FL_EXISTS) && !found) error = -ENOENT; goto out; } find_conflict: list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) { if (!flock_locks_conflict(&request->c, &fl->c)) continue; error = -EAGAIN; if (!(request->c.flc_flags & FL_SLEEP)) goto out; error = FILE_LOCK_DEFERRED; locks_insert_block(&fl->c, &request->c, flock_locks_conflict); goto out; } if (request->c.flc_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); locks_move_blocks(new_fl, request); locks_insert_lock_ctx(&new_fl->c, &ctx->flc_flock); new_fl = NULL; error = 0; out: spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); trace_flock_lock_inode(inode, request, error); return error; } static int posix_lock_inode(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { struct file_lock *fl, *tmp; struct file_lock *new_fl = NULL; struct file_lock *new_fl2 = NULL; struct file_lock *left = NULL; struct file_lock *right = NULL; struct file_lock_context *ctx; int error; bool added = false; LIST_HEAD(dispose); void *owner; void (*func)(void); ctx = locks_get_lock_context(inode, request->c.flc_type); if (!ctx) return lock_is_unlock(request) ? 0 : -ENOMEM; /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. * * In some cases we can be sure, that no new locks will be needed */ if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK || request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { new_fl = locks_alloc_lock(); new_fl2 = locks_alloc_lock(); } retry: percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If * there are any, either return error or put the request on the * blocker's list of waiters and the global blocked_hash. */ if (request->c.flc_type != F_UNLCK) { list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) { if (!posix_locks_conflict(&request->c, &fl->c)) continue; if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable && (*fl->fl_lmops->lm_lock_expirable)(fl)) { owner = fl->fl_lmops->lm_mod_owner; func = fl->fl_lmops->lm_expire_lock; __module_get(owner); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); (*func)(); module_put(owner); goto retry; } if (conflock) locks_copy_conflock(conflock, fl); error = -EAGAIN; if (!(request->c.flc_flags & FL_SLEEP)) goto out; /* * Deadlock detection and insertion into the blocked * locks list must be done while holding the same lock! */ error = -EDEADLK; spin_lock(&blocked_lock_lock); /* * Ensure that we don't find any locks blocked on this * request during deadlock detection. */ __locks_wake_up_blocks(&request->c); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; __locks_insert_block(&fl->c, &request->c, posix_locks_conflict); } spin_unlock(&blocked_lock_lock); goto out; } } /* If we're just looking for a conflict, we're done. */ error = 0; if (request->c.flc_flags & FL_ACCESS) goto out; /* Find the first old lock with the same owner as the new lock */ list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) { if (posix_same_owner(&request->c, &fl->c)) break; } /* Process locks with this owner. */ list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, c.flc_list) { if (!posix_same_owner(&request->c, &fl->c)) break; /* Detect adjacent or overlapping regions (if same lock type) */ if (request->c.flc_type == fl->c.flc_type) { /* In all comparisons of start vs end, use * "start - 1" rather than "end + 1". If end * is OFFSET_MAX, end + 1 will become negative. */ if (fl->fl_end < request->fl_start - 1) continue; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here. */ if (fl->fl_start - 1 > request->fl_end) break; /* If we come here, the new and old lock are of the * same type and adjacent or overlapping. Make one * lock yielding from the lower start address of both * locks to the higher end address. */ if (fl->fl_start > request->fl_start) fl->fl_start = request->fl_start; else request->fl_start = fl->fl_start; if (fl->fl_end < request->fl_end) fl->fl_end = request->fl_end; else request->fl_end = fl->fl_end; if (added) { locks_delete_lock_ctx(&fl->c, &dispose); continue; } request = fl; added = true; } else { /* Processing for different lock types is a bit * more complex. */ if (fl->fl_end < request->fl_start) continue; if (fl->fl_start > request->fl_end) break; if (lock_is_unlock(request)) added = true; if (fl->fl_start < request->fl_start) left = fl; /* If the next lock in the list has a higher end * address than the new one, insert the new one here. */ if (fl->fl_end > request->fl_end) { right = fl; break; } if (fl->fl_start >= request->fl_start) { /* The new lock completely replaces an old * one (This may happen several times). */ if (added) { locks_delete_lock_ctx(&fl->c, &dispose); continue; } /* * Replace the old lock with new_fl, and * remove the old one. It's safe to do the * insert here since we know that we won't be * using new_fl later, and that the lock is * just replacing an existing lock. */ error = -ENOLCK; if (!new_fl) goto out; locks_copy_lock(new_fl, request); locks_move_blocks(new_fl, request); request = new_fl; new_fl = NULL; locks_insert_lock_ctx(&request->c, &fl->c.flc_list); locks_delete_lock_ctx(&fl->c, &dispose); added = true; } } } /* * The above code only modifies existing locks in case of merging or * replacing. If new lock(s) need to be inserted all modifications are * done below this, so it's safe yet to bail out. */ error = -ENOLCK; /* "no luck" */ if (right && left == right && !new_fl2) goto out; error = 0; if (!added) { if (lock_is_unlock(request)) { if (request->c.flc_flags & FL_EXISTS) error = -ENOENT; goto out; } if (!new_fl) { error = -ENOLCK; goto out; } locks_copy_lock(new_fl, request); locks_move_blocks(new_fl, request); locks_insert_lock_ctx(&new_fl->c, &fl->c.flc_list); fl = new_fl; new_fl = NULL; } if (right) { if (left == right) { /* The new lock breaks the old one in two pieces, * so we have to use the second new lock. */ left = new_fl2; new_fl2 = NULL; locks_copy_lock(left, right); locks_insert_lock_ctx(&left->c, &fl->c.flc_list); } right->fl_start = request->fl_end + 1; locks_wake_up_blocks(&right->c); } if (left) { left->fl_end = request->fl_start - 1; locks_wake_up_blocks(&left->c); } out: trace_posix_lock_inode(inode, request, error); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); /* * Free any unused locks. */ if (new_fl) locks_free_lock(new_fl); if (new_fl2) locks_free_lock(new_fl2); locks_dispose_list(&dispose); return error; } /** * posix_lock_file - Apply a POSIX-style lock to a file * @filp: The file to apply the lock to * @fl: The lock to be applied * @conflock: Place to return a copy of the conflicting lock, if found. * * Add a POSIX style lock to a file. * We merge adjacent & overlapping locks whenever possible. * POSIX locks are sorted by owner task, then by starting address * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { return posix_lock_inode(file_inode(filp), fl, conflock); } EXPORT_SYMBOL(posix_lock_file); /** * posix_lock_inode_wait - Apply a POSIX-style lock to a file * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * * Apply a POSIX style lock request to an inode. */ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->c.flc_wait, list_empty(&fl->c.flc_blocked_member)); if (error) break; } locks_delete_block(fl); return error; } static void lease_clear_pending(struct file_lease *fl, int arg) { switch (arg) { case F_UNLCK: fl->c.flc_flags &= ~FL_UNLOCK_PENDING; fallthrough; case F_RDLCK: fl->c.flc_flags &= ~FL_DOWNGRADE_PENDING; } } /* We already had a lease on this file; just change its type */ int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) { int error = assign_type(&fl->c, arg); if (error) return error; lease_clear_pending(fl, arg); locks_wake_up_blocks(&fl->c); if (arg == F_UNLCK) { struct file *filp = fl->c.flc_file; f_delown(filp); file_f_owner(filp)->signum = 0; fasync_helper(0, fl->c.flc_file, 0, &fl->fl_fasync); if (fl->fl_fasync != NULL) { printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); fl->fl_fasync = NULL; } locks_delete_lock_ctx(&fl->c, dispose); } return 0; } EXPORT_SYMBOL(lease_modify); static bool past_time(unsigned long then) { if (!then) /* 0 is a special value meaning "this never expires": */ return false; return time_after(jiffies, then); } static void time_out_leases(struct inode *inode, struct list_head *dispose) { struct file_lock_context *ctx = inode->i_flctx; struct file_lease *fl, *tmp; lockdep_assert_held(&ctx->flc_lock); list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) { trace_time_out_leases(inode, fl); if (past_time(fl->fl_downgrade_time)) lease_modify(fl, F_RDLCK, dispose); if (past_time(fl->fl_break_time)) lease_modify(fl, F_UNLCK, dispose); } } static bool leases_conflict(struct file_lock_core *lc, struct file_lock_core *bc) { bool rc; struct file_lease *lease = file_lease(lc); struct file_lease *breaker = file_lease(bc); if (lease->fl_lmops->lm_breaker_owns_lease && lease->fl_lmops->lm_breaker_owns_lease(lease)) return false; if ((bc->flc_flags & FL_LAYOUT) != (lc->flc_flags & FL_LAYOUT)) { rc = false; goto trace; } if ((bc->flc_flags & FL_DELEG) && (lc->flc_flags & FL_LEASE)) { rc = false; goto trace; } rc = locks_conflict(bc, lc); trace: trace_leases_conflict(rc, lease, breaker); return rc; } static bool any_leases_conflict(struct inode *inode, struct file_lease *breaker) { struct file_lock_context *ctx = inode->i_flctx; struct file_lock_core *flc; lockdep_assert_held(&ctx->flc_lock); list_for_each_entry(flc, &ctx->flc_lease, flc_list) { if (leases_conflict(flc, &breaker->c)) return true; } return false; } /** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: * break all leases * @type: FL_LEASE: break leases and delegations; FL_DELEG: break * only delegations * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; struct file_lock_context *ctx; struct file_lease *new_fl, *fl, *tmp; unsigned long break_time; int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); new_fl->c.flc_flags = type; /* typically we will check that ctx is non-NULL before calling */ ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; } percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); if (!any_leases_conflict(inode, new_fl)) goto out; break_time = 0; if (lease_break_time > 0) { break_time = jiffies + lease_break_time * HZ; if (break_time == 0) break_time++; /* so that 0 means no break time */ } list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) { if (!leases_conflict(&fl->c, &new_fl->c)) continue; if (want_write) { if (fl->c.flc_flags & FL_UNLOCK_PENDING) continue; fl->c.flc_flags |= FL_UNLOCK_PENDING; fl->fl_break_time = break_time; } else { if (lease_breaking(fl)) continue; fl->c.flc_flags |= FL_DOWNGRADE_PENDING; fl->fl_downgrade_time = break_time; } if (fl->fl_lmops->lm_break(fl)) locks_delete_lock_ctx(&fl->c, &dispose); } if (list_empty(&ctx->flc_lease)) goto out; if (mode & O_NONBLOCK) { trace_break_lease_noblock(inode, new_fl); error = -EWOULDBLOCK; goto out; } restart: fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list); break_time = fl->fl_break_time; if (break_time != 0) break_time -= jiffies; if (break_time == 0) break_time++; locks_insert_block(&fl->c, &new_fl->c, leases_conflict); trace_break_lease_block(inode, new_fl); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->c.flc_wait, list_empty(&new_fl->c.flc_blocked_member), break_time); percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); trace_break_lease_unblock(inode, new_fl); __locks_delete_block(&new_fl->c); if (error >= 0) { /* * Wait for the next conflicting lease that has not been * broken yet */ if (error == 0) time_out_leases(inode, &dispose); if (any_leases_conflict(inode, new_fl)) goto restart; error = 0; } out: spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); free_lock: locks_free_lease(new_fl); return error; } EXPORT_SYMBOL(__break_lease); /** * lease_get_mtime - update modified time of an inode with exclusive lease * @inode: the inode * @time: pointer to a timespec which contains the last modified time * * This is to force NFS clients to flush their caches for files with * exclusive leases. The justification is that if someone has an * exclusive lease, then they could be modifying it. */ void lease_get_mtime(struct inode *inode, struct timespec64 *time) { bool has_lease = false; struct file_lock_context *ctx; struct file_lock_core *flc; ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); flc = list_first_entry_or_null(&ctx->flc_lease, struct file_lock_core, flc_list); if (flc && flc->flc_type == F_WRLCK) has_lease = true; spin_unlock(&ctx->flc_lock); } if (has_lease) *time = current_time(inode); } EXPORT_SYMBOL(lease_get_mtime); /** * fcntl_getlease - Enquire what lease is currently active * @filp: the file * * The value returned by this function will be one of * (if no lease break is pending): * * %F_RDLCK to indicate a shared lease is held. * * %F_WRLCK to indicate an exclusive lease is held. * * %F_UNLCK to indicate no lease is held. * * (if a lease break is pending): * * %F_RDLCK to indicate an exclusive lease needs to be * changed to a shared lease (or removed). * * %F_UNLCK to indicate the lease needs to be removed. * * XXX: sfr & willy disagree over whether F_INPROGRESS * should be returned to userspace. */ int fcntl_getlease(struct file *filp) { struct file_lease *fl; struct inode *inode = file_inode(filp); struct file_lock_context *ctx; int type = F_UNLCK; LIST_HEAD(dispose); ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file != filp) continue; type = target_leasetype(fl); break; } spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); } return type; } /** * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. * @filp: file to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags * * Check to see if there's an existing open fd on this file that would * conflict with the lease we're trying to set. */ static int check_conflicting_open(struct file *filp, const int arg, int flags) { struct inode *inode = file_inode(filp); int self_wcount = 0, self_rcount = 0; if (flags & FL_LAYOUT) return 0; if (flags & FL_DELEG) /* We leave these checks to the caller */ return 0; if (arg == F_RDLCK) return inode_is_open_for_write(inode) ? -EAGAIN : 0; else if (arg != F_WRLCK) return 0; /* * Make sure that only read/write count is from lease requestor. * Note that this will result in denying write leases when i_writecount * is negative, which is what we want. (We shouldn't grant write leases * on files open for execution.) */ if (filp->f_mode & FMODE_WRITE) self_wcount = 1; else if (filp->f_mode & FMODE_READ) self_rcount = 1; if (atomic_read(&inode->i_writecount) != self_wcount || atomic_read(&inode->i_readcount) != self_rcount) return -EAGAIN; return 0; } static int generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **priv) { struct file_lease *fl, *my_fl = NULL, *lease; struct inode *inode = file_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->c.flc_flags & FL_DELEG; int error; LIST_HEAD(dispose); lease = *flp; trace_generic_add_lease(inode, lease); error = file_f_owner_allocate(filp); if (error) return error; /* Note that arg is never F_UNLCK here */ ctx = locks_get_lock_context(inode, arg); if (!ctx) return -ENOMEM; /* * In the delegation case we need mutual exclusion with * a number of operations that take the i_mutex. We trylock * because delegations are an optional optimization, and if * there's some chance of a conflict--we'd rather not * bother, maybe that's a sign this just isn't a good file to * hand out a delegation on. */ if (is_deleg && !inode_trylock(inode)) return -EAGAIN; percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(filp, arg, lease->c.flc_flags); if (error) goto out; /* * At this point, we know that if there is an exclusive * lease on this file, then we hold it on this filp * (otherwise our open of this file would have blocked). * And if we are trying to acquire an exclusive lease, * then the file is not open by anyone (including us) * except for this filp. */ error = -EAGAIN; list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file == filp && fl->c.flc_owner == lease->c.flc_owner) { my_fl = fl; continue; } /* * No exclusive leases if someone else has a lease on * this file: */ if (arg == F_WRLCK) goto out; /* * Modifying our existing lease is OK, but no getting a * new lease if someone else is opening for write: */ if (fl->c.flc_flags & FL_UNLOCK_PENDING) goto out; } if (my_fl != NULL) { lease = my_fl; error = lease->fl_lmops->lm_change(lease, arg, &dispose); if (error) goto out; goto out_setup; } error = -EINVAL; if (!leases_enable) goto out; locks_insert_lock_ctx(&lease->c, &ctx->flc_lease); /* * The check in break_lease() is lockless. It's possible for another * open to race in after we did the earlier check for a conflicting * open but before the lease was inserted. Check again for a * conflicting open and cancel the lease if there is one. * * We also add a barrier here to ensure that the insertion of the lock * precedes these checks. */ smp_mb(); error = check_conflicting_open(filp, arg, lease->c.flc_flags); if (error) { locks_unlink_lock_ctx(&lease->c); goto out; } out_setup: if (lease->fl_lmops->lm_setup) lease->fl_lmops->lm_setup(lease, priv); out: spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); if (is_deleg) inode_unlock(inode); if (!error && !my_fl) *flp = NULL; return error; } static int generic_delete_lease(struct file *filp, void *owner) { int error = -EAGAIN; struct file_lease *fl, *victim = NULL; struct inode *inode = file_inode(filp); struct file_lock_context *ctx; LIST_HEAD(dispose); ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; } percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file == filp && fl->c.flc_owner == owner) { victim = fl; break; } } trace_generic_delete_lease(inode, victim); if (victim) error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); return error; } /** * generic_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @flp: input - file_lock to use, output - file_lock inserted * @priv: private data for lm_setup (may be NULL if lm_setup * doesn't require it) * * The (input) flp->fl_lmops->lm_break function is required * by break_lease(). */ int generic_setlease(struct file *filp, int arg, struct file_lease **flp, void **priv) { switch (arg) { case F_UNLCK: return generic_delete_lease(filp, *priv); case F_RDLCK: case F_WRLCK: if (!(*flp)->fl_lmops->lm_break) { WARN_ON_ONCE(1); return -ENOLCK; } return generic_add_lease(filp, arg, flp, priv); default: return -EINVAL; } } EXPORT_SYMBOL(generic_setlease); /* * Kernel subsystems can register to be notified on any attempt to set * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd * to close files that it may have cached when there is an attempt to set a * conflicting lease. */ static struct srcu_notifier_head lease_notifier_chain; static inline void lease_notifier_chain_init(void) { srcu_init_notifier_head(&lease_notifier_chain); } static inline void setlease_notifier(int arg, struct file_lease *lease) { if (arg != F_UNLCK) srcu_notifier_call_chain(&lease_notifier_chain, arg, lease); } int lease_register_notifier(struct notifier_block *nb) { return srcu_notifier_chain_register(&lease_notifier_chain, nb); } EXPORT_SYMBOL_GPL(lease_register_notifier); void lease_unregister_notifier(struct notifier_block *nb) { srcu_notifier_chain_unregister(&lease_notifier_chain, nb); } EXPORT_SYMBOL_GPL(lease_unregister_notifier); int kernel_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) { if (lease) setlease_notifier(arg, *lease); if (filp->f_op->setlease) return filp->f_op->setlease(filp, arg, lease, priv); else return generic_setlease(filp, arg, lease, priv); } EXPORT_SYMBOL_GPL(kernel_setlease); /** * vfs_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @lease: file_lock to use when adding a lease * @priv: private info for lm_setup when adding a lease (may be * NULL if lm_setup doesn't require it) * * Call this to establish a lease on the file. The "lease" argument is not * used for F_UNLCK requests and may be NULL. For commands that set or alter * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be * set; if not, this function will return -ENOLCK (and generate a scary-looking * stack trace). * * The "priv" pointer is passed directly to the lm_setup function as-is. It * may be NULL if the lm_setup operation doesn't require it. */ int vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) { struct inode *inode = file_inode(filp); vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode); int error; if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE)) return -EACCES; if (!S_ISREG(inode->i_mode)) return -EINVAL; error = security_file_lock(filp, arg); if (error) return error; return kernel_setlease(filp, arg, lease, priv); } EXPORT_SYMBOL_GPL(vfs_setlease); static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg) { struct file_lease *fl; struct fasync_struct *new; int error; fl = lease_alloc(filp, arg); if (IS_ERR(fl)) return PTR_ERR(fl); new = fasync_alloc(); if (!new) { locks_free_lease(fl); return -ENOMEM; } new->fa_fd = fd; error = vfs_setlease(filp, arg, &fl, (void **)&new); if (fl) locks_free_lease(fl); if (new) fasync_free(new); return error; } /** * fcntl_setlease - sets a lease on an open file * @fd: open file descriptor * @filp: file pointer * @arg: type of lease to obtain * * Call this fcntl to establish a lease on the file. * Note that you also need to call %F_SETSIG to * receive a signal when the lease is broken. */ int fcntl_setlease(unsigned int fd, struct file *filp, int arg) { if (arg == F_UNLCK) return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); return do_fcntl_add_lease(fd, filp, arg); } /** * flock_lock_inode_wait - Apply a FLOCK-style lock to a file * @inode: inode of the file to apply to * @fl: The lock to be applied * * Apply a FLOCK style lock request to an inode. */ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->c.flc_wait, list_empty(&fl->c.flc_blocked_member)); if (error) break; } locks_delete_block(fl); return error; } /** * locks_lock_inode_wait - Apply a lock to an inode * @inode: inode of the file to apply to * @fl: The lock to be applied * * Apply a POSIX or FLOCK style lock request to an inode. */ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int res = 0; switch (fl->c.flc_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX: res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK: res = flock_lock_inode_wait(inode, fl); break; default: BUG(); } return res; } EXPORT_SYMBOL(locks_lock_inode_wait); /** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. * @cmd: the type of lock to apply. * * Apply a %FL_FLOCK style lock to an open file descriptor. * The @cmd can be one of: * * - %LOCK_SH -- a shared lock. * - %LOCK_EX -- an exclusive lock. * - %LOCK_UN -- remove an existing lock. * - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED) * * %LOCK_MAND support has been removed from the kernel. */ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { int can_sleep, error, type; struct file_lock fl; struct fd f; /* * LOCK_MAND locks were broken for a long time in that they never * conflicted with one another and didn't prevent any sort of open, * read or write activity. * * Just ignore these requests now, to preserve legacy behavior, but * throw a warning to let people know that they don't actually work. */ if (cmd & LOCK_MAND) { pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid); return 0; } type = flock_translate_cmd(cmd & ~LOCK_NB); if (type < 0) return type; error = -EBADF; f = fdget(fd); if (!fd_file(f)) return error; if (type != F_UNLCK && !(fd_file(f)->f_mode & (FMODE_READ | FMODE_WRITE))) goto out_putf; flock_make_lock(fd_file(f), &fl, type); error = security_file_lock(fd_file(f), fl.c.flc_type); if (error) goto out_putf; can_sleep = !(cmd & LOCK_NB); if (can_sleep) fl.c.flc_flags |= FL_SLEEP; if (fd_file(f)->f_op->flock) error = fd_file(f)->f_op->flock(fd_file(f), (can_sleep) ? F_SETLKW : F_SETLK, &fl); else error = locks_lock_file_wait(fd_file(f), &fl); locks_release_private(&fl); out_putf: fdput(f); return error; } /** * vfs_test_lock - test file byte range lock * @filp: The file to test lock for * @fl: The lock to test; also used to hold result * * Returns -ERRNO on failure. Indicates presence of conflicting lock by * setting conf->fl_type to something other than F_UNLCK. */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; } EXPORT_SYMBOL_GPL(vfs_test_lock); /** * locks_translate_pid - translate a file_lock's fl_pid number into a namespace * @fl: The file_lock who's fl_pid should be translated * @ns: The namespace into which the pid should be translated * * Used to translate a fl_pid into a namespace virtual pid number */ static pid_t locks_translate_pid(struct file_lock_core *fl, struct pid_namespace *ns) { pid_t vnr; struct pid *pid; if (fl->flc_flags & FL_OFDLCK) return -1; /* Remote locks report a negative pid value */ if (fl->flc_pid <= 0) return fl->flc_pid; /* * If the flock owner process is dead and its pid has been already * freed, the translation below won't work, but we still want to show * flock owner pid number in init pidns. */ if (ns == &init_pid_ns) return (pid_t) fl->flc_pid; rcu_read_lock(); pid = find_pid_ns(fl->flc_pid, &init_pid_ns); vnr = pid_nr_ns(pid, ns); rcu_read_unlock(); return vnr; } static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current)); #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via * legacy 32bit flock. */ if (fl->fl_start > OFFT_OFFSET_MAX) return -EOVERFLOW; if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) return -EOVERFLOW; #endif flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; flock->l_whence = 0; flock->l_type = fl->c.flc_type; return 0; } #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current)); flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; flock->l_whence = 0; flock->l_type = fl->c.flc_type; } #endif /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock) { struct file_lock *fl; int error; fl = locks_alloc_lock(); if (fl == NULL) return -ENOMEM; error = -EINVAL; if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK && flock->l_type != F_WRLCK) goto out; error = flock_to_posix_lock(filp, fl, flock); if (error) goto out; if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock->l_pid != 0) goto out; fl->c.flc_flags |= FL_OFDLCK; fl->c.flc_owner = filp; } error = vfs_test_lock(filp, fl); if (error) goto out; flock->l_type = fl->c.flc_type; if (fl->c.flc_type != F_UNLCK) { error = posix_lock_to_flock(flock, fl); if (error) goto out; } out: locks_free_lock(fl); return error; } /** * vfs_lock_file - file byte range lock * @filp: The file to apply the lock to * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) * @fl: The lock to be applied * @conf: Place to return a copy of the conflicting lock, if found. * * A caller that doesn't care about the conflicting lock may pass NULL * as the final argument. * * If the filesystem defines a private ->lock() method, then @conf will * be left unchanged; so a caller that cares should initialize it to * some acceptable default. * * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX * locks, the ->lock() interface may return asynchronously, before the lock has * been granted or denied by the underlying filesystem, if (and only if) * lm_grant is set. Additionally EXPORT_OP_ASYNC_LOCK in export_operations * flags need to be set. * * Callers expecting ->lock() to return asynchronously will only use F_SETLK, * not F_SETLKW; they will set FL_SLEEP if (and only if) the request is for a * blocking lock. When ->lock() does return asynchronously, it must return * FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock request completes. * If the request is for non-blocking lock the file system should return * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine * with the result. If the request timed out the callback routine will return a * nonzero return code and the file system should release the lock. The file * system is also responsible to keep a corresponding posix lock when it * grants a lock so the VFS can find out which locks are locally held and do * the correct lock cleanup when required. * The underlying filesystem must not drop the kernel lock or call * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED * return code. */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); } EXPORT_SYMBOL_GPL(vfs_lock_file); static int do_lock_file_wait(struct file *filp, unsigned int cmd, struct file_lock *fl) { int error; error = security_file_lock(filp, fl->c.flc_type); if (error) return error; for (;;) { error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->c.flc_wait, list_empty(&fl->c.flc_blocked_member)); if (error) break; } locks_delete_block(fl); return error; } /* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */ static int check_fmode_for_setlk(struct file_lock *fl) { switch (fl->c.flc_type) { case F_RDLCK: if (!(fl->c.flc_file->f_mode & FMODE_READ)) return -EBADF; break; case F_WRLCK: if (!(fl->c.flc_file->f_mode & FMODE_WRITE)) return -EBADF; } return 0; } /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, struct flock *flock) { struct file_lock *file_lock = locks_alloc_lock(); struct inode *inode = file_inode(filp); struct file *f; int error; if (file_lock == NULL) return -ENOLCK; error = flock_to_posix_lock(filp, file_lock, flock); if (error) goto out; error = check_fmode_for_setlk(file_lock); if (error) goto out; /* * If the cmd is requesting file-private locks, then set the * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLK; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; break; case F_OFD_SETLKW: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLKW; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; fallthrough; case F_SETLKW: file_lock->c.flc_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); /* * Detect close/fcntl races and recover by zapping all POSIX locks * associated with this file and our files_struct, just like on * filp_flush(). There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->c.flc_type != F_UNLCK && !(file_lock->c.flc_flags & FL_OFDLCK)) { struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ spin_lock(&files->file_lock); f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { locks_remove_posix(filp, files); error = -EBADF; } } out: trace_fcntl_setlk(inode, file_lock, error); locks_free_lock(file_lock); return error; } #if BITS_PER_LONG == 32 /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock) { struct file_lock *fl; int error; fl = locks_alloc_lock(); if (fl == NULL) return -ENOMEM; error = -EINVAL; if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK && flock->l_type != F_WRLCK) goto out; error = flock64_to_posix_lock(filp, fl, flock); if (error) goto out; if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock->l_pid != 0) goto out; fl->c.flc_flags |= FL_OFDLCK; fl->c.flc_owner = filp; } error = vfs_test_lock(filp, fl); if (error) goto out; flock->l_type = fl->c.flc_type; if (fl->c.flc_type != F_UNLCK) posix_lock_to_flock64(flock, fl); out: locks_free_lock(fl); return error; } /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, struct flock64 *flock) { struct file_lock *file_lock = locks_alloc_lock(); struct file *f; int error; if (file_lock == NULL) return -ENOLCK; error = flock64_to_posix_lock(filp, file_lock, flock); if (error) goto out; error = check_fmode_for_setlk(file_lock); if (error) goto out; /* * If the cmd is requesting file-private locks, then set the * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLK64; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; break; case F_OFD_SETLKW: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLKW64; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; fallthrough; case F_SETLKW64: file_lock->c.flc_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); /* * Detect close/fcntl races and recover by zapping all POSIX locks * associated with this file and our files_struct, just like on * filp_flush(). There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->c.flc_type != F_UNLCK && !(file_lock->c.flc_flags & FL_OFDLCK)) { struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ spin_lock(&files->file_lock); f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { locks_remove_posix(filp, files); error = -EBADF; } } out: locks_free_lock(file_lock); return error; } #endif /* BITS_PER_LONG == 32 */ /* * This function is called when the file is being removed * from the task's fd array. POSIX locks belonging to this task * are deleted at this time. */ void locks_remove_posix(struct file *filp, fl_owner_t owner) { int error; struct inode *inode = file_inode(filp); struct file_lock lock; struct file_lock_context *ctx; /* * If there are no locks held on this file, we don't need to call * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; locks_init_lock(&lock); lock.c.flc_type = F_UNLCK; lock.c.flc_flags = FL_POSIX | FL_CLOSE; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; lock.c.flc_owner = owner; lock.c.flc_pid = current->tgid; lock.c.flc_file = filp; lock.fl_ops = NULL; lock.fl_lmops = NULL; error = vfs_lock_file(filp, F_SETLK, &lock, NULL); if (lock.fl_ops && lock.fl_ops->fl_release_private) lock.fl_ops->fl_release_private(&lock); trace_locks_remove_posix(inode, &lock, error); } EXPORT_SYMBOL(locks_remove_posix); /* The i_flctx must be valid when calling into here */ static void locks_remove_flock(struct file *filp, struct file_lock_context *flctx) { struct file_lock fl; struct inode *inode = file_inode(filp); if (list_empty(&flctx->flc_flock)) return; flock_make_lock(filp, &fl, F_UNLCK); fl.c.flc_flags |= FL_CLOSE; if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else flock_lock_inode(inode, &fl); if (fl.fl_ops && fl.fl_ops->fl_release_private) fl.fl_ops->fl_release_private(&fl); } /* The i_flctx must be valid when calling into here */ static void locks_remove_lease(struct file *filp, struct file_lock_context *ctx) { struct file_lease *fl, *tmp; LIST_HEAD(dispose); if (list_empty(&ctx->flc_lease)) return; percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) if (filp == fl->c.flc_file) lease_modify(fl, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); } /* * This function is called on the last close of an open file. */ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; ctx = locks_inode_context(file_inode(filp)); if (!ctx) return; /* remove any OFD locks */ locks_remove_posix(filp, filp); /* remove flock locks */ locks_remove_flock(filp, ctx); /* remove any leases */ locks_remove_lease(filp, ctx); spin_lock(&ctx->flc_lock); locks_check_ctx_file_list(filp, &ctx->flc_posix, "POSIX"); locks_check_ctx_file_list(filp, &ctx->flc_flock, "FLOCK"); locks_check_ctx_file_list(filp, &ctx->flc_lease, "LEASE"); spin_unlock(&ctx->flc_lock); } /** * vfs_cancel_lock - file byte range unblock lock * @filp: The file to apply the unblock to * @fl: The lock to be unblocked * * Used by lock managers to cancel blocked requests */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } EXPORT_SYMBOL_GPL(vfs_cancel_lock); /** * vfs_inode_has_locks - are any file locks held on @inode? * @inode: inode to check for locks * * Return true if there are any FL_POSIX or FL_FLOCK locks currently * set on @inode. */ bool vfs_inode_has_locks(struct inode *inode) { struct file_lock_context *ctx; bool ret; ctx = locks_inode_context(inode); if (!ctx) return false; spin_lock(&ctx->flc_lock); ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock); spin_unlock(&ctx->flc_lock); return ret; } EXPORT_SYMBOL_GPL(vfs_inode_has_locks); #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> #include <linux/seq_file.h> struct locks_iterator { int li_cpu; loff_t li_pos; }; static void lock_get_status(struct seq_file *f, struct file_lock_core *flc, loff_t id, char *pfx, int repeat) { struct inode *inode = NULL; unsigned int pid; struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb); int type = flc->flc_type; struct file_lock *fl = file_lock(flc); pid = locks_translate_pid(flc, proc_pidns); /* * If lock owner is dead (and pid is freed) or not visible in current * pidns, zero is shown as a pid value. Check lock info from * init_pid_ns to get saved lock pid value. */ if (flc->flc_file != NULL) inode = file_inode(flc->flc_file); seq_printf(f, "%lld: ", id); if (repeat) seq_printf(f, "%*s", repeat - 1 + (int)strlen(pfx), pfx); if (flc->flc_flags & FL_POSIX) { if (flc->flc_flags & FL_ACCESS) seq_puts(f, "ACCESS"); else if (flc->flc_flags & FL_OFDLCK) seq_puts(f, "OFDLCK"); else seq_puts(f, "POSIX "); seq_printf(f, " %s ", (inode == NULL) ? "*NOINODE*" : "ADVISORY "); } else if (flc->flc_flags & FL_FLOCK) { seq_puts(f, "FLOCK ADVISORY "); } else if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) { struct file_lease *lease = file_lease(flc); type = target_leasetype(lease); if (flc->flc_flags & FL_DELEG) seq_puts(f, "DELEG "); else seq_puts(f, "LEASE "); if (lease_breaking(lease)) seq_puts(f, "BREAKING "); else if (flc->flc_file) seq_puts(f, "ACTIVE "); else seq_puts(f, "BREAKER "); } else { seq_puts(f, "UNKNOWN UNKNOWN "); } seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" : (type == F_RDLCK) ? "READ" : "UNLCK"); if (inode) { /* userspace relies on this representation of dev_t */ seq_printf(f, "%d %02x:%02x:%lu ", pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } else { seq_printf(f, "%d <none>:0 ", pid); } if (flc->flc_flags & FL_POSIX) { if (fl->fl_end == OFFSET_MAX) seq_printf(f, "%Ld EOF\n", fl->fl_start); else seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); } else { seq_puts(f, "0 EOF\n"); } } static struct file_lock_core *get_next_blocked_member(struct file_lock_core *node) { struct file_lock_core *tmp; /* NULL node or root node */ if (node == NULL || node->flc_blocker == NULL) return NULL; /* Next member in the linked list could be itself */ tmp = list_next_entry(node, flc_blocked_member); if (list_entry_is_head(tmp, &node->flc_blocker->flc_blocked_requests, flc_blocked_member) || tmp == node) { return NULL; } return tmp; } static int locks_show(struct seq_file *f, void *v) { struct locks_iterator *iter = f->private; struct file_lock_core *cur, *tmp; struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb); int level = 0; cur = hlist_entry(v, struct file_lock_core, flc_link); if (locks_translate_pid(cur, proc_pidns) == 0) return 0; /* View this crossed linked list as a binary tree, the first member of flc_blocked_requests * is the left child of current node, the next silibing in flc_blocked_member is the * right child, we can alse get the parent of current node from flc_blocker, so this * question becomes traversal of a binary tree */ while (cur != NULL) { if (level) lock_get_status(f, cur, iter->li_pos, "-> ", level); else lock_get_status(f, cur, iter->li_pos, "", level); if (!list_empty(&cur->flc_blocked_requests)) { /* Turn left */ cur = list_first_entry_or_null(&cur->flc_blocked_requests, struct file_lock_core, flc_blocked_member); level++; } else { /* Turn right */ tmp = get_next_blocked_member(cur); /* Fall back to parent node */ while (tmp == NULL && cur->flc_blocker != NULL) { cur = cur->flc_blocker; level--; tmp = get_next_blocked_member(cur); } cur = tmp; } } return 0; } static void __show_fd_locks(struct seq_file *f, struct list_head *head, int *id, struct file *filp, struct files_struct *files) { struct file_lock_core *fl; list_for_each_entry(fl, head, flc_list) { if (filp != fl->flc_file) continue; if (fl->flc_owner != files && fl->flc_owner != filp) continue; (*id)++; seq_puts(f, "lock:\t"); lock_get_status(f, fl, *id, "", 0); } } void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) { struct inode *inode = file_inode(filp); struct file_lock_context *ctx; int id = 0; ctx = locks_inode_context(inode); if (!ctx) return; spin_lock(&ctx->flc_lock); __show_fd_locks(f, &ctx->flc_flock, &id, filp, files); __show_fd_locks(f, &ctx->flc_posix, &id, filp, files); __show_fd_locks(f, &ctx->flc_lease, &id, filp, files); spin_unlock(&ctx->flc_lock); } static void *locks_start(struct seq_file *f, loff_t *pos) __acquires(&blocked_lock_lock) { struct locks_iterator *iter = f->private; iter->li_pos = *pos + 1; percpu_down_write(&file_rwsem); spin_lock(&blocked_lock_lock); return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos); } static void *locks_next(struct seq_file *f, void *v, loff_t *pos) { struct locks_iterator *iter = f->private; ++iter->li_pos; return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos); } static void locks_stop(struct seq_file *f, void *v) __releases(&blocked_lock_lock) { spin_unlock(&blocked_lock_lock); percpu_up_write(&file_rwsem); } static const struct seq_operations locks_seq_operations = { .start = locks_start, .next = locks_next, .stop = locks_stop, .show = locks_show, }; static int __init proc_locks_init(void) { proc_create_seq_private("locks", 0, NULL, &locks_seq_operations, sizeof(struct locks_iterator), NULL); return 0; } fs_initcall(proc_locks_init); #endif static int __init filelock_init(void) { int i; flctx_cache = kmem_cache_create("file_lock_ctx", sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL); filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); filelease_cache = kmem_cache_create("file_lease_cache", sizeof(struct file_lease), 0, SLAB_PANIC, NULL); for_each_possible_cpu(i) { struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i); spin_lock_init(&fll->lock); INIT_HLIST_HEAD(&fll->hlist); } lease_notifier_chain_init(); return 0; } core_initcall(filelock_init); |
25 22 3 1 1 1 25 25 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 | /* * Copyright (c) 2004-2011 Atheros Communications Inc. * Copyright (c) 2011-2012 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "core.h" #include "hif-ops.h" #include "target.h" #include "debug.h" int ath6kl_bmi_done(struct ath6kl *ar) { int ret; u32 cid = BMI_DONE; if (ar->bmi.done_sent) { ath6kl_dbg(ATH6KL_DBG_BMI, "bmi done skipped\n"); return 0; } ar->bmi.done_sent = true; ret = ath6kl_hif_bmi_write(ar, (u8 *)&cid, sizeof(cid)); if (ret) { ath6kl_err("Unable to send bmi done: %d\n", ret); return ret; } return 0; } int ath6kl_bmi_get_target_info(struct ath6kl *ar, struct ath6kl_bmi_target_info *targ_info) { int ret; u32 cid = BMI_GET_TARGET_INFO; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } ret = ath6kl_hif_bmi_write(ar, (u8 *)&cid, sizeof(cid)); if (ret) { ath6kl_err("Unable to send get target info: %d\n", ret); return ret; } if (ar->hif_type == ATH6KL_HIF_TYPE_USB) { ret = ath6kl_hif_bmi_read(ar, (u8 *)targ_info, sizeof(*targ_info)); } else { ret = ath6kl_hif_bmi_read(ar, (u8 *)&targ_info->version, sizeof(targ_info->version)); } if (ret) { ath6kl_err("Unable to recv target info: %d\n", ret); return ret; } if (le32_to_cpu(targ_info->version) == TARGET_VERSION_SENTINAL) { /* Determine how many bytes are in the Target's targ_info */ ret = ath6kl_hif_bmi_read(ar, (u8 *)&targ_info->byte_count, sizeof(targ_info->byte_count)); if (ret) { ath6kl_err("unable to read target info byte count: %d\n", ret); return ret; } /* * The target's targ_info doesn't match the host's targ_info. * We need to do some backwards compatibility to make this work. */ if (le32_to_cpu(targ_info->byte_count) != sizeof(*targ_info)) { WARN_ON(1); return -EINVAL; } /* Read the remainder of the targ_info */ ret = ath6kl_hif_bmi_read(ar, ((u8 *)targ_info) + sizeof(targ_info->byte_count), sizeof(*targ_info) - sizeof(targ_info->byte_count)); if (ret) { ath6kl_err("Unable to read target info (%d bytes): %d\n", targ_info->byte_count, ret); return ret; } } ath6kl_dbg(ATH6KL_DBG_BMI, "target info (ver: 0x%x type: 0x%x)\n", targ_info->version, targ_info->type); return 0; } int ath6kl_bmi_read(struct ath6kl *ar, u32 addr, u8 *buf, u32 len) { u32 cid = BMI_READ_MEMORY; int ret; u32 offset; u32 len_remain, rx_len; u16 size; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } size = ar->bmi.max_data_size + sizeof(cid) + sizeof(addr) + sizeof(len); if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } memset(ar->bmi.cmd_buf, 0, size); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi read memory: device: addr: 0x%x, len: %d\n", addr, len); len_remain = len; while (len_remain) { rx_len = (len_remain < ar->bmi.max_data_size) ? len_remain : ar->bmi.max_data_size; offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &addr, sizeof(addr)); offset += sizeof(addr); memcpy(&(ar->bmi.cmd_buf[offset]), &rx_len, sizeof(rx_len)); offset += sizeof(len); ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to write to the device: %d\n", ret); return ret; } ret = ath6kl_hif_bmi_read(ar, ar->bmi.cmd_buf, rx_len); if (ret) { ath6kl_err("Unable to read from the device: %d\n", ret); return ret; } memcpy(&buf[len - len_remain], ar->bmi.cmd_buf, rx_len); len_remain -= rx_len; addr += rx_len; } return 0; } int ath6kl_bmi_write(struct ath6kl *ar, u32 addr, u8 *buf, u32 len) { u32 cid = BMI_WRITE_MEMORY; int ret; u32 offset; u32 len_remain, tx_len; const u32 header = sizeof(cid) + sizeof(addr) + sizeof(len); u8 aligned_buf[400]; u8 *src; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } if ((ar->bmi.max_data_size + header) > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } if (WARN_ON(ar->bmi.max_data_size > sizeof(aligned_buf))) return -E2BIG; memset(ar->bmi.cmd_buf, 0, ar->bmi.max_data_size + header); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi write memory: addr: 0x%x, len: %d\n", addr, len); len_remain = len; while (len_remain) { src = &buf[len - len_remain]; if (len_remain < (ar->bmi.max_data_size - header)) { if (len_remain & 3) { /* align it with 4 bytes */ len_remain = len_remain + (4 - (len_remain & 3)); memcpy(aligned_buf, src, len_remain); src = aligned_buf; } tx_len = len_remain; } else { tx_len = (ar->bmi.max_data_size - header); } offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &addr, sizeof(addr)); offset += sizeof(addr); memcpy(&(ar->bmi.cmd_buf[offset]), &tx_len, sizeof(tx_len)); offset += sizeof(tx_len); memcpy(&(ar->bmi.cmd_buf[offset]), src, tx_len); offset += tx_len; ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to write to the device: %d\n", ret); return ret; } len_remain -= tx_len; addr += tx_len; } return 0; } int ath6kl_bmi_execute(struct ath6kl *ar, u32 addr, u32 *param) { u32 cid = BMI_EXECUTE; int ret; u32 offset; u16 size; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } size = sizeof(cid) + sizeof(addr) + sizeof(*param); if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } memset(ar->bmi.cmd_buf, 0, size); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi execute: addr: 0x%x, param: %d)\n", addr, *param); offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &addr, sizeof(addr)); offset += sizeof(addr); memcpy(&(ar->bmi.cmd_buf[offset]), param, sizeof(*param)); offset += sizeof(*param); ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to write to the device: %d\n", ret); return ret; } ret = ath6kl_hif_bmi_read(ar, ar->bmi.cmd_buf, sizeof(*param)); if (ret) { ath6kl_err("Unable to read from the device: %d\n", ret); return ret; } memcpy(param, ar->bmi.cmd_buf, sizeof(*param)); return 0; } int ath6kl_bmi_set_app_start(struct ath6kl *ar, u32 addr) { u32 cid = BMI_SET_APP_START; int ret; u32 offset; u16 size; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } size = sizeof(cid) + sizeof(addr); if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } memset(ar->bmi.cmd_buf, 0, size); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi set app start: addr: 0x%x\n", addr); offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &addr, sizeof(addr)); offset += sizeof(addr); ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to write to the device: %d\n", ret); return ret; } return 0; } int ath6kl_bmi_reg_read(struct ath6kl *ar, u32 addr, u32 *param) { u32 cid = BMI_READ_SOC_REGISTER; int ret; u32 offset; u16 size; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } size = sizeof(cid) + sizeof(addr); if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } memset(ar->bmi.cmd_buf, 0, size); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi read SOC reg: addr: 0x%x\n", addr); offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &addr, sizeof(addr)); offset += sizeof(addr); ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to write to the device: %d\n", ret); return ret; } ret = ath6kl_hif_bmi_read(ar, ar->bmi.cmd_buf, sizeof(*param)); if (ret) { ath6kl_err("Unable to read from the device: %d\n", ret); return ret; } memcpy(param, ar->bmi.cmd_buf, sizeof(*param)); return 0; } int ath6kl_bmi_reg_write(struct ath6kl *ar, u32 addr, u32 param) { u32 cid = BMI_WRITE_SOC_REGISTER; int ret; u32 offset; u16 size; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } size = sizeof(cid) + sizeof(addr) + sizeof(param); if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } memset(ar->bmi.cmd_buf, 0, size); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi write SOC reg: addr: 0x%x, param: %d\n", addr, param); offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &addr, sizeof(addr)); offset += sizeof(addr); memcpy(&(ar->bmi.cmd_buf[offset]), ¶m, sizeof(param)); offset += sizeof(param); ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to write to the device: %d\n", ret); return ret; } return 0; } int ath6kl_bmi_lz_data(struct ath6kl *ar, u8 *buf, u32 len) { u32 cid = BMI_LZ_DATA; int ret; u32 offset; u32 len_remain, tx_len; const u32 header = sizeof(cid) + sizeof(len); u16 size; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } size = ar->bmi.max_data_size + header; if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } memset(ar->bmi.cmd_buf, 0, size); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi send LZ data: len: %d)\n", len); len_remain = len; while (len_remain) { tx_len = (len_remain < (ar->bmi.max_data_size - header)) ? len_remain : (ar->bmi.max_data_size - header); offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &tx_len, sizeof(tx_len)); offset += sizeof(tx_len); memcpy(&(ar->bmi.cmd_buf[offset]), &buf[len - len_remain], tx_len); offset += tx_len; ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to write to the device: %d\n", ret); return ret; } len_remain -= tx_len; } return 0; } int ath6kl_bmi_lz_stream_start(struct ath6kl *ar, u32 addr) { u32 cid = BMI_LZ_STREAM_START; int ret; u32 offset; u16 size; if (ar->bmi.done_sent) { ath6kl_err("bmi done sent already, cmd %d disallowed\n", cid); return -EACCES; } size = sizeof(cid) + sizeof(addr); if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; } memset(ar->bmi.cmd_buf, 0, size); ath6kl_dbg(ATH6KL_DBG_BMI, "bmi LZ stream start: addr: 0x%x)\n", addr); offset = 0; memcpy(&(ar->bmi.cmd_buf[offset]), &cid, sizeof(cid)); offset += sizeof(cid); memcpy(&(ar->bmi.cmd_buf[offset]), &addr, sizeof(addr)); offset += sizeof(addr); ret = ath6kl_hif_bmi_write(ar, ar->bmi.cmd_buf, offset); if (ret) { ath6kl_err("Unable to start LZ stream to the device: %d\n", ret); return ret; } return 0; } int ath6kl_bmi_fast_download(struct ath6kl *ar, u32 addr, u8 *buf, u32 len) { int ret; u32 last_word = 0; u32 last_word_offset = len & ~0x3; u32 unaligned_bytes = len & 0x3; ret = ath6kl_bmi_lz_stream_start(ar, addr); if (ret) return ret; if (unaligned_bytes) { /* copy the last word into a zero padded buffer */ memcpy(&last_word, &buf[last_word_offset], unaligned_bytes); } ret = ath6kl_bmi_lz_data(ar, buf, last_word_offset); if (ret) return ret; if (unaligned_bytes) ret = ath6kl_bmi_lz_data(ar, (u8 *)&last_word, 4); if (!ret) { /* Close compressed stream and open a new (fake) one. * This serves mainly to flush Target caches. */ ret = ath6kl_bmi_lz_stream_start(ar, 0x00); } return ret; } void ath6kl_bmi_reset(struct ath6kl *ar) { ar->bmi.done_sent = false; } int ath6kl_bmi_init(struct ath6kl *ar) { if (WARN_ON(ar->bmi.max_data_size == 0)) return -EINVAL; /* cmd + addr + len + data_size */ ar->bmi.max_cmd_size = ar->bmi.max_data_size + (sizeof(u32) * 3); ar->bmi.cmd_buf = kzalloc(ar->bmi.max_cmd_size, GFP_KERNEL); if (!ar->bmi.cmd_buf) return -ENOMEM; return 0; } void ath6kl_bmi_cleanup(struct ath6kl *ar) { kfree(ar->bmi.cmd_buf); ar->bmi.cmd_buf = NULL; } |
1100 1780 1773 454 426 284 3 304 106 101 200 1743 528 290 554 46 464 444 566 423 305 592 569 117 118 101 237 241 242 241 149 27 147 31 214 164 202 22 55 229 14 223 1076 1058 1057 21 1814 1815 1053 1058 1576 1576 1101 1100 5 2 5 78 78 118 26 92 195 5 190 148 148 176 3 174 440 441 1574 4 1 4 794 795 793 1 1 533 536 533 1 195 5 190 149 149 175 3 174 441 440 1578 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/core/dev_addr_lists.c - Functions for handling net device lists * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com> * * This file contains functions for working with unicast, multicast and device * addresses lists. */ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/export.h> #include <linux/list.h> #include "dev.h" /* * General list handling functions */ static int __hw_addr_insert(struct netdev_hw_addr_list *list, struct netdev_hw_addr *new, int addr_len) { struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; struct netdev_hw_addr *ha; while (*ins_point) { int diff; ha = rb_entry(*ins_point, struct netdev_hw_addr, node); diff = memcmp(new->addr, ha->addr, addr_len); if (diff == 0) diff = memcmp(&new->type, &ha->type, sizeof(new->type)); parent = *ins_point; if (diff < 0) ins_point = &parent->rb_left; else if (diff > 0) ins_point = &parent->rb_right; else return -EEXIST; } rb_link_node_rcu(&new->node, parent, ins_point); rb_insert_color(&new->node, &list->tree); return 0; } static struct netdev_hw_addr* __hw_addr_create(const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; int alloc_size; alloc_size = sizeof(*ha); if (alloc_size < L1_CACHE_BYTES) alloc_size = L1_CACHE_BYTES; ha = kmalloc(alloc_size, GFP_ATOMIC); if (!ha) return NULL; memcpy(ha->addr, addr, addr_len); ha->type = addr_type; ha->refcount = 1; ha->global_use = global; ha->synced = sync ? 1 : 0; ha->sync_cnt = 0; return ha; } static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync, int sync_count, bool exclusive) { struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; struct netdev_hw_addr *ha; if (addr_len > MAX_ADDR_LEN) return -EINVAL; while (*ins_point) { int diff; ha = rb_entry(*ins_point, struct netdev_hw_addr, node); diff = memcmp(addr, ha->addr, addr_len); if (diff == 0) diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); parent = *ins_point; if (diff < 0) { ins_point = &parent->rb_left; } else if (diff > 0) { ins_point = &parent->rb_right; } else { if (exclusive) return -EEXIST; if (global) { /* check if addr is already used as global */ if (ha->global_use) return 0; else ha->global_use = true; } if (sync) { if (ha->synced && sync_count) return -EEXIST; else ha->synced++; } ha->refcount++; return 0; } } ha = __hw_addr_create(addr, addr_len, addr_type, global, sync); if (!ha) return -ENOMEM; rb_link_node(&ha->node, parent, ins_point); rb_insert_color(&ha->node, &list->tree); list_add_tail_rcu(&ha->list, &list->list); list->count++; return 0; } static int __hw_addr_add(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false, 0, false); } static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, struct netdev_hw_addr *ha, bool global, bool sync) { if (global && !ha->global_use) return -ENOENT; if (sync && !ha->synced) return -ENOENT; if (global) ha->global_use = false; if (sync) ha->synced--; if (--ha->refcount) return 0; rb_erase(&ha->node, &list->tree); list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); list->count--; return 0; } static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { struct rb_node *node; node = list->tree.rb_node; while (node) { struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node); int diff = memcmp(addr, ha->addr, addr_len); if (diff == 0 && addr_type) diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); if (diff < 0) node = node->rb_left; else if (diff > 0) node = node->rb_right; else return ha; } return NULL; } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type); if (!ha) return -ENOENT; return __hw_addr_del_entry(list, ha, global, sync); } static int __hw_addr_del(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false); } static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, int addr_len) { int err; err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, false, true, ha->sync_cnt, false); if (err && err != -EEXIST) return err; if (!err) { ha->sync_cnt++; ha->refcount++; } return 0; } static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, struct netdev_hw_addr *ha, int addr_len) { int err; err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type, false, true); if (err) return; ha->sync_cnt--; /* address on from list is not marked synced */ __hw_addr_del_entry(from_list, ha, false, false); } static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->sync_cnt == ha->refcount) { __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } else { err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; } } return err; } /* This function only works where there is a strict 1-1 relationship * between source and destination of they synch. If you ever need to * sync addresses to more then 1 destination, you need to use * __hw_addr_sync_multiple(). */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (!ha->sync_cnt) { err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; } else if (ha->refcount == 1) __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } return err; } EXPORT_SYMBOL(__hw_addr_sync); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->sync_cnt) __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } } EXPORT_SYMBOL(__hw_addr_unsync); /** * __hw_addr_sync_dev - Synchronize device's multicast list * @list: address list to synchronize * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed * * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address add/remove * notifications. The unsync function may be NULL in which case * the addresses requiring removal will simply be removed without * any notification to the device. **/ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), int (*unsync)(struct net_device *, const unsigned char *)) { struct netdev_hw_addr *ha, *tmp; int err; /* first go through and flush out any stale entries */ list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt || ha->refcount != 1) continue; /* if unsync is defined and fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr)) continue; ha->sync_cnt--; __hw_addr_del_entry(list, ha, false, false); } /* go through and sync new entries to the list */ list_for_each_entry_safe(ha, tmp, &list->list, list) { if (ha->sync_cnt) continue; err = sync(dev, ha->addr); if (err) return err; ha->sync_cnt++; ha->refcount++; } return 0; } EXPORT_SYMBOL(__hw_addr_sync_dev); /** * __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking * into account references * @list: address list to synchronize * @dev: device to sync * @sync: function to call if address or reference on it should be added * @unsync: function to call if address or some reference on it should removed * * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address or references on it * add/remove notifications. The unsync function may be NULL in which case * the addresses or references on it requiring removal will simply be * removed without any notification to the device. That is responsibility of * the driver to identify and distribute address or references on it between * internal address tables. **/ int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *, int), int (*unsync)(struct net_device *, const unsigned char *, int)) { struct netdev_hw_addr *ha, *tmp; int err, ref_cnt; /* first go through and flush out any unsynced/stale entries */ list_for_each_entry_safe(ha, tmp, &list->list, list) { /* sync if address is not used */ if ((ha->sync_cnt << 1) <= ha->refcount) continue; /* if fails defer unsyncing address */ ref_cnt = ha->refcount - ha->sync_cnt; if (unsync && unsync(dev, ha->addr, ref_cnt)) continue; ha->refcount = (ref_cnt << 1) + 1; ha->sync_cnt = ref_cnt; __hw_addr_del_entry(list, ha, false, false); } /* go through and sync updated/new entries to the list */ list_for_each_entry_safe(ha, tmp, &list->list, list) { /* sync if address added or reused */ if ((ha->sync_cnt << 1) >= ha->refcount) continue; ref_cnt = ha->refcount - ha->sync_cnt; err = sync(dev, ha->addr, ref_cnt); if (err) return err; ha->refcount = ref_cnt << 1; ha->sync_cnt = ref_cnt; } return 0; } EXPORT_SYMBOL(__hw_addr_ref_sync_dev); /** * __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on * it from device * @list: address list to remove synchronized addresses (references on it) from * @dev: device to sync * @unsync: function to call if address and references on it should be removed * * Remove all addresses that were added to the device by * __hw_addr_ref_sync_dev(). This function is intended to be called from the * ndo_stop or ndo_open functions on devices that require explicit address (or * references on it) add/remove notifications. If the unsync function pointer * is NULL then this function can be used to just reset the sync_cnt for the * addresses in the list. **/ void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *, int)) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt) continue; /* if fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr, ha->sync_cnt)) continue; ha->refcount -= ha->sync_cnt - 1; ha->sync_cnt = 0; __hw_addr_del_entry(list, ha, false, false); } } EXPORT_SYMBOL(__hw_addr_ref_unsync_dev); /** * __hw_addr_unsync_dev - Remove synchronized addresses from device * @list: address list to remove synchronized addresses from * @dev: device to sync * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by __hw_addr_sync_dev(). * This function is intended to be called from the ndo_stop or ndo_open * functions on devices that require explicit address add/remove * notifications. If the unsync function pointer is NULL then this function * can be used to just reset the sync_cnt for the addresses in the list. **/ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt) continue; /* if unsync is defined and fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr)) continue; ha->sync_cnt--; __hw_addr_del_entry(list, ha, false, false); } } EXPORT_SYMBOL(__hw_addr_unsync_dev); static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; list->tree = RB_ROOT; list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); } list->count = 0; } void __hw_addr_init(struct netdev_hw_addr_list *list) { INIT_LIST_HEAD(&list->list); list->count = 0; list->tree = RB_ROOT; } EXPORT_SYMBOL(__hw_addr_init); /* * Device addresses handling functions */ /* Check that netdev->dev_addr is not written to directly as this would * break the rbtree layout. All changes should go thru dev_addr_set() and co. * Remove this check in mid-2024. */ void dev_addr_check(struct net_device *dev) { if (!memcmp(dev->dev_addr, dev->dev_addr_shadow, MAX_ADDR_LEN)) return; netdev_warn(dev, "Current addr: %*ph\n", MAX_ADDR_LEN, dev->dev_addr); netdev_warn(dev, "Expected addr: %*ph\n", MAX_ADDR_LEN, dev->dev_addr_shadow); netdev_WARN(dev, "Incorrect netdev->dev_addr\n"); } /** * dev_addr_flush - Flush device address list * @dev: device * * Flush device address list and reset ->dev_addr. * * The caller must hold the rtnl_mutex. */ void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ dev_addr_check(dev); __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } /** * dev_addr_init - Init device address list * @dev: device * * Init device address list and create the first element, * used by ->dev_addr. * * The caller must hold the rtnl_mutex. */ int dev_addr_init(struct net_device *dev) { unsigned char addr[MAX_ADDR_LEN]; struct netdev_hw_addr *ha; int err; /* rtnl_mutex must be held here */ __hw_addr_init(&dev->dev_addrs); memset(addr, 0, sizeof(addr)); err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* * Get the first (previously created) address from the list * and set dev_addr pointer to this location. */ ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); dev->dev_addr = ha->addr; } return err; } void dev_addr_mod(struct net_device *dev, unsigned int offset, const void *addr, size_t len) { struct netdev_hw_addr *ha; dev_addr_check(dev); ha = container_of(dev->dev_addr, struct netdev_hw_addr, addr[0]); rb_erase(&ha->node, &dev->dev_addrs.tree); memcpy(&ha->addr[offset], addr, len); memcpy(&dev->dev_addr_shadow[offset], addr, len); WARN_ON(__hw_addr_insert(&dev->dev_addrs, ha, dev->addr_len)); } EXPORT_SYMBOL(dev_addr_mod); /** * dev_addr_add - Add a device address * @dev: device * @addr: address to add * @addr_type: address type * * Add a device address to the device or increase the reference count if * it already exists. * * The caller must hold the rtnl_mutex. */ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; ASSERT_RTNL(); err = dev_pre_changeaddr_notify(dev, addr, NULL); if (err) return err; err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } EXPORT_SYMBOL(dev_addr_add); /** * dev_addr_del - Release a device address. * @dev: device * @addr: address to delete * @addr_type: address type * * Release reference to a device address and remove it from the device * if the reference count drops to zero. * * The caller must hold the rtnl_mutex. */ int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; struct netdev_hw_addr *ha; ASSERT_RTNL(); /* * We can not remove the first address from the list because * dev->dev_addr points to that. */ ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); if (!memcmp(ha->addr, addr, dev->addr_len) && ha->type == addr_type && ha->refcount == 1) return -ENOENT; err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } EXPORT_SYMBOL(dev_addr_del); /* * Unicast list handling functions */ /** * dev_uc_add_excl - Add a global secondary unicast address * @dev: device * @addr: address to add */ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST, true, false, 0, true); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_add_excl); /** * dev_uc_add - Add a secondary unicast address * @dev: device * @addr: address to add * * Add a secondary unicast address to the device or increase * the reference count if it already exists. */ int dev_uc_add(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_add); /** * dev_uc_del - Release secondary unicast address. * @dev: device * @addr: address to delete * * Release reference to a secondary unicast address and remove it * from the device if the reference count drops to zero. */ int dev_uc_del(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_del(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_del); /** * dev_uc_sync - Synchronize device's unicast list to another device * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. This function assumes that * addresses will only ever be synced to the @to devices and no other. */ int dev_uc_sync(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_uc_sync); /** * dev_uc_sync_multiple - Synchronize device's unicast list to another * device, but allow for multiple calls to sync to multiple devices. * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have been deleted from the source. The source device * must be locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. It allows for a single source * device to be synced to multiple destination devices. */ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_uc_sync_multiple); /** * dev_uc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device * * Remove all addresses that were added to the destination device by * dev_uc_sync(). This function is intended to be called from the * dev->stop function of layered software devices. */ void dev_uc_unsync(struct net_device *to, struct net_device *from) { if (to->addr_len != from->addr_len) return; /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two * reasons: * 1) This is always called without any addr_list_lock, so as the * outermost one here, it must be 0. * 2) This is called by some callers after unlinking the upper device, * so the dev->lower_level becomes 1 again. * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or * larger. */ netif_addr_lock_bh(from); netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_uc_unsync); /** * dev_uc_flush - Flush unicast addresses * @dev: device * * Flush unicast addresses. */ void dev_uc_flush(struct net_device *dev) { netif_addr_lock_bh(dev); __hw_addr_flush(&dev->uc); netif_addr_unlock_bh(dev); } EXPORT_SYMBOL(dev_uc_flush); /** * dev_uc_init - Init unicast address list * @dev: device * * Init unicast address list. */ void dev_uc_init(struct net_device *dev) { __hw_addr_init(&dev->uc); } EXPORT_SYMBOL(dev_uc_init); /* * Multicast list handling functions */ /** * dev_mc_add_excl - Add a global secondary multicast address * @dev: device * @addr: address to add */ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, true, false, 0, true); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_mc_add_excl); static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, bool global) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, global, false, 0, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } /** * dev_mc_add - Add a multicast address * @dev: device * @addr: address to add * * Add a multicast address to the device or increase * the reference count if it already exists. */ int dev_mc_add(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, false); } EXPORT_SYMBOL(dev_mc_add); /** * dev_mc_add_global - Add a global multicast address * @dev: device * @addr: address to add * * Add a global multicast address to the device. */ int dev_mc_add_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, true); } EXPORT_SYMBOL(dev_mc_add_global); static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, bool global) { int err; netif_addr_lock_bh(dev); err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } /** * dev_mc_del - Delete a multicast address. * @dev: device * @addr: address to delete * * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ int dev_mc_del(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, false); } EXPORT_SYMBOL(dev_mc_del); /** * dev_mc_del_global - Delete a global multicast address. * @dev: device * @addr: address to delete * * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ int dev_mc_del_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, true); } EXPORT_SYMBOL(dev_mc_del_global); /** * dev_mc_sync - Synchronize device's multicast list to another device * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the ndo_set_rx_mode * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_mc_sync); /** * dev_mc_sync_multiple - Synchronize device's multicast list to another * device, but allow for multiple calls to sync to multiple devices. * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the ndo_set_rx_mode * function of layered software devices. It allows for a single * source device to be synced to multiple destination devices. */ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_mc_sync_multiple); /** * dev_mc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device * * Remove all addresses that were added to the destination device by * dev_mc_sync(). This function is intended to be called from the * dev->stop function of layered software devices. */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { if (to->addr_len != from->addr_len) return; /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_mc_unsync); /** * dev_mc_flush - Flush multicast addresses * @dev: device * * Flush multicast addresses. */ void dev_mc_flush(struct net_device *dev) { netif_addr_lock_bh(dev); __hw_addr_flush(&dev->mc); netif_addr_unlock_bh(dev); } EXPORT_SYMBOL(dev_mc_flush); /** * dev_mc_init - Init multicast address list * @dev: device * * Init multicast address list. */ void dev_mc_init(struct net_device *dev) { __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init); |
20 4 21 14 30 29 3 12 12 8 8 7 8 6 2 3 167 115 53 53 3 5 53 1 35 11 19 3 2 30 14 15 15 3 2 12 12 2 3 10 10 6 3 13 5 14 5 15 3 13 5 25 46 3 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 | // SPDX-License-Identifier: GPL-2.0-only /* * This is a module which is used for logging packets to userspace via * nfetlink. * * (C) 2005 by Harald Welte <laforge@netfilter.org> * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * Based on the old ipv4-only ipt_ULOG.c: * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/netfilter_bridge.h> #include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_log.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/spinlock.h> #include <linux/sysctl.h> #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/list.h> #include <linux/slab.h> #include <net/sock.h> #include <net/netfilter/nf_log.h> #include <net/netns/generic.h> #include <linux/atomic.h> #include <linux/refcount.h> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include "../bridge/br_private.h" #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif #define NFULNL_COPY_DISABLED 0xff #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ /* max packet size is limited by 16-bit struct nfattr nfa_len field */ #define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); struct nfulnl_instance { struct hlist_node hlist; /* global list of instances */ spinlock_t lock; refcount_t use; /* use count */ unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; struct net *net; netns_tracker ns_tracker; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ u32 peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ unsigned int nlbufsiz; /* netlink buffer allocation size */ unsigned int qthreshold; /* threshold of the queue */ u_int32_t copy_range; u_int32_t seq; /* instance-local sequential counter */ u_int16_t group_num; /* number of this queue */ u_int16_t flags; u_int8_t copy_mode; struct rcu_head rcu; }; #define INSTANCE_BUCKETS 16 static unsigned int nfnl_log_net_id __read_mostly; struct nfnl_log_net { spinlock_t instances_lock; struct hlist_head instance_table[INSTANCE_BUCKETS]; atomic_t global_seq; }; static struct nfnl_log_net *nfnl_log_pernet(struct net *net) { return net_generic(net, nfnl_log_net_id); } static inline u_int8_t instance_hashfn(u_int16_t group_num) { return ((group_num & 0xff) % INSTANCE_BUCKETS); } static struct nfulnl_instance * __instance_lookup(const struct nfnl_log_net *log, u16 group_num) { const struct hlist_head *head; struct nfulnl_instance *inst; head = &log->instance_table[instance_hashfn(group_num)]; hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->group_num == group_num) return inst; } return NULL; } static inline void instance_get(struct nfulnl_instance *inst) { refcount_inc(&inst->use); } static struct nfulnl_instance * instance_lookup_get_rcu(const struct nfnl_log_net *log, u16 group_num) { struct nfulnl_instance *inst; inst = __instance_lookup(log, group_num); if (inst && !refcount_inc_not_zero(&inst->use)) inst = NULL; return inst; } static struct nfulnl_instance * instance_lookup_get(const struct nfnl_log_net *log, u16 group_num) { struct nfulnl_instance *inst; rcu_read_lock(); inst = instance_lookup_get_rcu(log, group_num); rcu_read_unlock(); return inst; } static void nfulnl_instance_free_rcu(struct rcu_head *head) { struct nfulnl_instance *inst = container_of(head, struct nfulnl_instance, rcu); put_net_track(inst->net, &inst->ns_tracker); kfree(inst); module_put(THIS_MODULE); } static void instance_put(struct nfulnl_instance *inst) { if (inst && refcount_dec_and_test(&inst->use)) call_rcu(&inst->rcu, nfulnl_instance_free_rcu); } static void nfulnl_timer(struct timer_list *t); static struct nfulnl_instance * instance_create(struct net *net, u_int16_t group_num, u32 portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; struct nfnl_log_net *log = nfnl_log_pernet(net); int err; spin_lock_bh(&log->instances_lock); if (__instance_lookup(log, group_num)) { err = -EEXIST; goto out_unlock; } inst = kzalloc(sizeof(*inst), GFP_ATOMIC); if (!inst) { err = -ENOMEM; goto out_unlock; } if (!try_module_get(THIS_MODULE)) { kfree(inst); err = -EAGAIN; goto out_unlock; } INIT_HLIST_NODE(&inst->hlist); spin_lock_init(&inst->lock); /* needs to be two, since we _put() after creation */ refcount_set(&inst->use, 2); timer_setup(&inst->timer, nfulnl_timer, 0); inst->net = get_net_track(net, &inst->ns_tracker, GFP_ATOMIC); inst->peer_user_ns = user_ns; inst->peer_portid = portid; inst->group_num = group_num; inst->qthreshold = NFULNL_QTHRESH_DEFAULT; inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT; inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT; inst->copy_mode = NFULNL_COPY_PACKET; inst->copy_range = NFULNL_COPY_RANGE_MAX; hlist_add_head_rcu(&inst->hlist, &log->instance_table[instance_hashfn(group_num)]); spin_unlock_bh(&log->instances_lock); return inst; out_unlock: spin_unlock_bh(&log->instances_lock); return ERR_PTR(err); } static void __nfulnl_flush(struct nfulnl_instance *inst); /* called with BH disabled */ static void __instance_destroy(struct nfulnl_instance *inst) { /* first pull it out of the global list */ hlist_del_rcu(&inst->hlist); /* then flush all pending packets from skb */ spin_lock(&inst->lock); /* lockless readers wont be able to use us */ inst->copy_mode = NFULNL_COPY_DISABLED; if (inst->skb) __nfulnl_flush(inst); spin_unlock(&inst->lock); /* and finally put the refcount */ instance_put(inst); } static inline void instance_destroy(struct nfnl_log_net *log, struct nfulnl_instance *inst) { spin_lock_bh(&log->instances_lock); __instance_destroy(inst); spin_unlock_bh(&log->instances_lock); } static int nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, unsigned int range) { int status = 0; spin_lock_bh(&inst->lock); switch (mode) { case NFULNL_COPY_NONE: case NFULNL_COPY_META: inst->copy_mode = mode; inst->copy_range = 0; break; case NFULNL_COPY_PACKET: inst->copy_mode = mode; if (range == 0) range = NFULNL_COPY_RANGE_MAX; inst->copy_range = min_t(unsigned int, range, NFULNL_COPY_RANGE_MAX); break; default: status = -EINVAL; break; } spin_unlock_bh(&inst->lock); return status; } static int nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) { int status; spin_lock_bh(&inst->lock); if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT) status = -ERANGE; else if (nlbufsiz > 131072) status = -ERANGE; else { inst->nlbufsiz = nlbufsiz; status = 0; } spin_unlock_bh(&inst->lock); return status; } static void nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) { spin_lock_bh(&inst->lock); inst->flushtimeout = timeout; spin_unlock_bh(&inst->lock); } static void nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) { spin_lock_bh(&inst->lock); inst->qthreshold = qthresh; spin_unlock_bh(&inst->lock); } static int nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) { spin_lock_bh(&inst->lock); inst->flags = flags; spin_unlock_bh(&inst->lock); return 0; } static struct sk_buff * nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; /* alloc skb which should be big enough for a whole multipart * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ skb = alloc_skb(pkt_size, GFP_ATOMIC); } } return skb; } static void __nfulnl_send(struct nfulnl_instance *inst) { if (inst->qlen > 1) { struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, NLMSG_DONE, sizeof(struct nfgenmsg), 0); if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", inst->skb->len, skb_tailroom(inst->skb))) { kfree_skb(inst->skb); goto out; } } nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid); out: inst->qlen = 0; inst->skb = NULL; } static void __nfulnl_flush(struct nfulnl_instance *inst) { /* timer holds a reference */ if (del_timer(&inst->timer)) instance_put(inst); if (inst->skb) __nfulnl_send(inst); } static void nfulnl_timer(struct timer_list *t) { struct nfulnl_instance *inst = from_timer(inst, t, timer); spin_lock_bh(&inst->lock); if (inst->skb) __nfulnl_send(inst); spin_unlock_bh(&inst->lock); instance_put(inst); } static u32 nfulnl_get_bridge_size(const struct sk_buff *skb) { u32 size = 0; if (!skb_mac_header_was_set(skb)) return 0; if (skb_vlan_tag_present(skb)) { size += nla_total_size(0); /* nested */ size += nla_total_size(sizeof(u16)); /* id */ size += nla_total_size(sizeof(u16)); /* tag */ } if (skb->network_header > skb->mac_header) size += nla_total_size(skb->network_header - skb->mac_header); return size; } static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb) { if (!skb_mac_header_was_set(skb)) return 0; if (skb_vlan_tag_present(skb)) { struct nlattr *nest; nest = nla_nest_start(inst->skb, NFULA_VLAN); if (!nest) goto nla_put_failure; if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) || nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto)) goto nla_put_failure; nla_nest_end(inst->skb, nest); } if (skb->mac_header < skb->network_header) { int len = (int)(skb->network_header - skb->mac_header); if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb))) goto nla_put_failure; } return 0; nla_put_failure: return -1; } /* This is an inline function, we don't really care about a long * list of arguments */ static inline int __build_packet_message(struct nfnl_log_net *log, struct nfulnl_instance *inst, const struct sk_buff *skb, unsigned int data_len, u_int8_t pf, unsigned int hooknum, const struct net_device *indev, const struct net_device *outdev, const char *prefix, unsigned int plen, const struct nfnl_ct_hook *nfnl_ct, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { struct nfulnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; const unsigned char *hwhdrp; nlh = nfnl_msg_put(inst->skb, 0, 0, nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), 0, pf, NFNETLINK_V0, htons(inst->group_num)); if (!nlh) return -1; memset(&pmsg, 0, sizeof(pmsg)); pmsg.hw_protocol = skb->protocol; pmsg.hook = hooknum; if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg)) goto nla_put_failure; if (prefix && nla_put(inst->skb, NFULA_PREFIX, plen, prefix)) goto nla_put_failure; if (indev) { #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)) || /* this is the bridge group "brX" */ /* rcu_read_lock()ed by nf_hook_thresh or * nf_log_packet. */ nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; } else { int physinif; /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; physinif = nf_bridge_get_physinif(skb); if (physinif && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, htonl(physinif))) goto nla_put_failure; } #endif } if (outdev) { #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)) || /* this is the bridge group "brX" */ /* rcu_read_lock()ed by nf_hook_thresh or * nf_log_packet. */ nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; } else { struct net_device *physoutdev; /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; physoutdev = nf_bridge_get_physoutdev(skb); if (physoutdev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, htonl(physoutdev->ifindex))) goto nla_put_failure; } #endif } if (skb->mark && nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark))) goto nla_put_failure; if (indev && skb->dev && skb_mac_header_was_set(skb) && skb_mac_header_len(skb) != 0) { struct nfulnl_msg_packet_hw phw; int len; memset(&phw, 0, sizeof(phw)); len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { phw.hw_addrlen = htons(len); if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw)) goto nla_put_failure; } } if (indev && skb_mac_header_was_set(skb)) { if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, htons(skb->dev->hard_header_len))) goto nla_put_failure; hwhdrp = skb_mac_header(skb); if (skb->dev->type == ARPHRD_SIT) hwhdrp -= ETH_HLEN; if (hwhdrp >= skb->head && nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, hwhdrp)) goto nla_put_failure; } if (hooknum <= NF_INET_FORWARD) { struct timespec64 kts = ktime_to_timespec64(skb_tstamp_cond(skb, true)); struct nfulnl_msg_packet_timestamp ts; ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts)) goto nla_put_failure; } /* UID */ sk = skb->sk; if (sk && sk_fullsock(sk)) { read_lock_bh(&sk->sk_callback_lock); if (sk->sk_socket && sk->sk_socket->file) { struct file *file = sk->sk_socket->file; const struct cred *cred = file->f_cred; struct user_namespace *user_ns = inst->peer_user_ns; __be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid)); __be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid)); read_unlock_bh(&sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) goto nla_put_failure; } else read_unlock_bh(&sk->sk_callback_lock); } /* local sequence number */ if ((inst->flags & NFULNL_CFG_F_SEQ) && nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++))) goto nla_put_failure; /* global sequence number */ if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) && nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL, htonl(atomic_inc_return(&log->global_seq)))) goto nla_put_failure; if (ct && nfnl_ct->build(inst->skb, ct, ctinfo, NFULA_CT, NFULA_CT_INFO) < 0) goto nla_put_failure; if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) && nfulnl_put_bridge(inst, skb) < 0) goto nla_put_failure; if (data_len) { struct nlattr *nla; int size = nla_attr_size(data_len); if (skb_tailroom(inst->skb) < nla_total_size(data_len)) goto nla_put_failure; nla = skb_put(inst->skb, nla_total_size(data_len)); nla->nla_type = NFULA_PAYLOAD; nla->nla_len = size; if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } nlh->nlmsg_len = inst->skb->tail - old_tail; return 0; nla_put_failure: PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); return -1; } static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_ULOG, .u = { .ulog = { .copy_len = 0xffff, .group = 0, .qthreshold = 1, }, }, }; /* log handler for internal netfilter logging api */ static void nfulnl_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li_user, const char *prefix) { size_t size; unsigned int data_len; struct nfulnl_instance *inst; const struct nf_loginfo *li; unsigned int qthreshold; unsigned int plen = 0; struct nfnl_log_net *log = nfnl_log_pernet(net); const struct nfnl_ct_hook *nfnl_ct = NULL; enum ip_conntrack_info ctinfo = 0; struct nf_conn *ct = NULL; if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; else li = &default_loginfo; inst = instance_lookup_get_rcu(log, li->u.ulog.group); if (!inst) return; if (prefix) plen = strlen(prefix) + 1; /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif + nla_total_size(sizeof(u_int32_t)) /* mark */ + nla_total_size(sizeof(u_int32_t)) /* uid */ + nla_total_size(sizeof(u_int32_t)) /* gid */ + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) + nla_total_size(sizeof(u_int16_t)) /* hwtype */ + nla_total_size(sizeof(u_int16_t)); /* hwlen */ } spin_lock_bh(&inst->lock); if (inst->flags & NFULNL_CFG_F_SEQ) size += nla_total_size(sizeof(u_int32_t)); if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) size += nla_total_size(sizeof(u_int32_t)); #if IS_ENABLED(CONFIG_NF_CONNTRACK) if (inst->flags & NFULNL_CFG_F_CONNTRACK) { nfnl_ct = rcu_dereference(nfnl_ct_hook); if (nfnl_ct != NULL) { ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) size += nfnl_ct->build_size(ct); } } #endif if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) size += nfulnl_get_bridge_size(skb); qthreshold = inst->qthreshold; /* per-rule qthreshold overrides per-instance */ if (li->u.ulog.qthreshold) if (qthreshold > li->u.ulog.qthreshold) qthreshold = li->u.ulog.qthreshold; switch (inst->copy_mode) { case NFULNL_COPY_META: case NFULNL_COPY_NONE: data_len = 0; break; case NFULNL_COPY_PACKET: data_len = inst->copy_range; if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) && (li->u.ulog.copy_len < data_len)) data_len = li->u.ulog.copy_len; if (data_len > skb->len) data_len = skb->len; size += nla_total_size(data_len); break; case NFULNL_COPY_DISABLED: default: goto unlock_and_release; } if (inst->skb && size > skb_tailroom(inst->skb)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ __nfulnl_flush(inst); } if (!inst->skb) { inst->skb = nfulnl_alloc_skb(net, inst->peer_portid, inst->nlbufsiz, size); if (!inst->skb) goto alloc_failure; } inst->qlen++; __build_packet_message(log, inst, skb, data_len, pf, hooknum, in, out, prefix, plen, nfnl_ct, ct, ctinfo); if (inst->qlen >= qthreshold) __nfulnl_flush(inst); /* timer_pending always called within inst->lock, so there * is no chance of a race here */ else if (!timer_pending(&inst->timer)) { instance_get(inst); inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100); add_timer(&inst->timer); } unlock_and_release: spin_unlock_bh(&inst->lock); instance_put(inst); return; alloc_failure: /* FIXME: statistics */ goto unlock_and_release; } static int nfulnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; struct nfnl_log_net *log = nfnl_log_pernet(n->net); if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this portid */ spin_lock_bh(&log->instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *t2; struct nfulnl_instance *inst; struct hlist_head *head = &log->instance_table[i]; hlist_for_each_entry_safe(inst, t2, head, hlist) { if (n->portid == inst->peer_portid) __instance_destroy(inst); } } spin_unlock_bh(&log->instances_lock); } return NOTIFY_DONE; } static struct notifier_block nfulnl_rtnl_notifier = { .notifier_call = nfulnl_rcv_nl_event, }; static int nfulnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nfula[]) { return -ENOTSUPP; } static struct nf_logger nfulnl_logger __read_mostly = { .name = "nfnetlink_log", .type = NF_LOG_TYPE_ULOG, .logfn = nfulnl_log_packet, .me = THIS_MODULE, }; static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { [NFULA_CFG_CMD] = { .len = sizeof(struct nfulnl_msg_config_cmd) }, [NFULA_CFG_MODE] = { .len = sizeof(struct nfulnl_msg_config_mode) }, [NFULA_CFG_TIMEOUT] = { .type = NLA_U32 }, [NFULA_CFG_QTHRESH] = { .type = NLA_U32 }, [NFULA_CFG_NLBUFSIZ] = { .type = NLA_U32 }, [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, }; static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nfula[]) { struct nfnl_log_net *log = nfnl_log_pernet(info->net); u_int16_t group_num = ntohs(info->nfmsg->res_id); struct nfulnl_msg_config_cmd *cmd = NULL; struct nfulnl_instance *inst; u16 flags = 0; int ret = 0; if (nfula[NFULA_CFG_CMD]) { u_int8_t pf = info->nfmsg->nfgen_family; cmd = nla_data(nfula[NFULA_CFG_CMD]); /* Commands without queue context */ switch (cmd->command) { case NFULNL_CFG_CMD_PF_BIND: return nf_log_bind_pf(info->net, pf, &nfulnl_logger); case NFULNL_CFG_CMD_PF_UNBIND: nf_log_unbind_pf(info->net, pf); return 0; } } inst = instance_lookup_get(log, group_num); if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; } /* Check if we support these flags in first place, dependencies should * be there too not to break atomicity. */ if (nfula[NFULA_CFG_FLAGS]) { flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS])); if ((flags & NFULNL_CFG_F_CONNTRACK) && !rcu_access_pointer(nfnl_ct_hook)) { #ifdef CONFIG_MODULES nfnl_unlock(NFNL_SUBSYS_ULOG); request_module("ip_conntrack_netlink"); nfnl_lock(NFNL_SUBSYS_ULOG); if (rcu_access_pointer(nfnl_ct_hook)) { ret = -EAGAIN; goto out_put; } #endif ret = -EOPNOTSUPP; goto out_put; } } if (cmd != NULL) { switch (cmd->command) { case NFULNL_CFG_CMD_BIND: if (inst) { ret = -EBUSY; goto out_put; } inst = instance_create(info->net, group_num, NETLINK_CB(skb).portid, sk_user_ns(NETLINK_CB(skb).sk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); goto out; } break; case NFULNL_CFG_CMD_UNBIND: if (!inst) { ret = -ENODEV; goto out; } instance_destroy(log, inst); goto out_put; default: ret = -ENOTSUPP; goto out_put; } } else if (!inst) { ret = -ENODEV; goto out; } if (nfula[NFULA_CFG_MODE]) { struct nfulnl_msg_config_mode *params = nla_data(nfula[NFULA_CFG_MODE]); nfulnl_set_mode(inst, params->copy_mode, ntohl(params->copy_range)); } if (nfula[NFULA_CFG_TIMEOUT]) { __be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]); nfulnl_set_timeout(inst, ntohl(timeout)); } if (nfula[NFULA_CFG_NLBUFSIZ]) { __be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]); nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); } if (nfula[NFULA_CFG_QTHRESH]) { __be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]); nfulnl_set_qthresh(inst, ntohl(qthresh)); } if (nfula[NFULA_CFG_FLAGS]) nfulnl_set_flags(inst, flags); out_put: instance_put(inst); out: return ret; } static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, .type = NFNL_CB_MUTEX, .attr_count = NFULA_MAX, }, [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, .type = NFNL_CB_MUTEX, .attr_count = NFULA_CFG_MAX, .policy = nfula_cfg_policy }, }; static const struct nfnetlink_subsystem nfulnl_subsys = { .name = "log", .subsys_id = NFNL_SUBSYS_ULOG, .cb_count = NFULNL_MSG_MAX, .cb = nfulnl_cb, }; #ifdef CONFIG_PROC_FS struct iter_state { struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *get_first(struct net *net, struct iter_state *st) { struct nfnl_log_net *log; if (!st) return NULL; log = nfnl_log_pernet(net); for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { struct hlist_head *head = &log->instance_table[st->bucket]; if (!hlist_empty(head)) return rcu_dereference(hlist_first_rcu(head)); } return NULL; } static struct hlist_node *get_next(struct net *net, struct iter_state *st, struct hlist_node *h) { h = rcu_dereference(hlist_next_rcu(h)); while (!h) { struct nfnl_log_net *log; struct hlist_head *head; if (++st->bucket >= INSTANCE_BUCKETS) return NULL; log = nfnl_log_pernet(net); head = &log->instance_table[st->bucket]; h = rcu_dereference(hlist_first_rcu(head)); } return h; } static struct hlist_node *get_idx(struct net *net, struct iter_state *st, loff_t pos) { struct hlist_node *head; head = get_first(net, st); if (head) while (pos && (head = get_next(net, st, head))) pos--; return pos ? NULL : head; } static void *seq_start(struct seq_file *s, loff_t *pos) __acquires(rcu) { rcu_read_lock(); return get_idx(seq_file_net(s), s->private, *pos); } static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; return get_next(seq_file_net(s), s->private, v); } static void seq_stop(struct seq_file *s, void *v) __releases(rcu) { rcu_read_unlock(); } static int seq_show(struct seq_file *s, void *v) { const struct nfulnl_instance *inst = v; seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n", inst->group_num, inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, inst->flushtimeout, refcount_read(&inst->use)); return 0; } static const struct seq_operations nful_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, .show = seq_show, }; #endif /* PROC_FS */ static int __net_init nfnl_log_net_init(struct net *net) { unsigned int i; struct nfnl_log_net *log = nfnl_log_pernet(net); #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; kuid_t root_uid; kgid_t root_gid; #endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&log->instance_table[i]); spin_lock_init(&log->instances_lock); #ifdef CONFIG_PROC_FS proc = proc_create_net("nfnetlink_log", 0440, net->nf.proc_netfilter, &nful_seq_ops, sizeof(struct iter_state)); if (!proc) return -ENOMEM; root_uid = make_kuid(net->user_ns, 0); root_gid = make_kgid(net->user_ns, 0); if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(proc, root_uid, root_gid); #endif return 0; } static void __net_exit nfnl_log_net_exit(struct net *net) { struct nfnl_log_net *log = nfnl_log_pernet(net); unsigned int i; #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); #endif nf_log_unset(net, &nfulnl_logger); for (i = 0; i < INSTANCE_BUCKETS; i++) WARN_ON_ONCE(!hlist_empty(&log->instance_table[i])); } static struct pernet_operations nfnl_log_net_ops = { .init = nfnl_log_net_init, .exit = nfnl_log_net_exit, .id = &nfnl_log_net_id, .size = sizeof(struct nfnl_log_net), }; static int __init nfnetlink_log_init(void) { int status; status = register_pernet_subsys(&nfnl_log_net_ops); if (status < 0) { pr_err("failed to register pernet ops\n"); goto out; } netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { pr_err("failed to create netlink socket\n"); goto cleanup_netlink_notifier; } status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); if (status < 0) { pr_err("failed to register logger\n"); goto cleanup_subsys; } return status; cleanup_subsys: nfnetlink_subsys_unregister(&nfulnl_subsys); cleanup_netlink_notifier: netlink_unregister_notifier(&nfulnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_log_net_ops); out: return status; } static void __exit nfnetlink_log_fini(void) { nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_log_net_ops); nf_log_unregister(&nfulnl_logger); } MODULE_DESCRIPTION("netfilter userspace logging"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG); MODULE_ALIAS_NF_LOGGER(AF_INET, 1); MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */ MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */ module_init(nfnetlink_log_init); module_exit(nfnetlink_log_fini); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for some monterey "special" devices * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" static const __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize >= 31 && rdesc[29] == 0x05 && rdesc[30] == 0x09) { hid_info(hdev, "fixing up button/consumer in HID report descriptor\n"); rdesc[30] = 0x0c; } return rdesc; } #define mr_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ EV_KEY, (c)) static int mr_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) != HID_UP_CONSUMER) return 0; switch (usage->hid & HID_USAGE) { case 0x156: mr_map_key_clear(KEY_WORDPROCESSOR); break; case 0x157: mr_map_key_clear(KEY_SPREADSHEET); break; case 0x158: mr_map_key_clear(KEY_PRESENTATION); break; case 0x15c: mr_map_key_clear(KEY_STOP); break; default: return 0; } return 1; } static const struct hid_device_id mr_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) }, { } }; MODULE_DEVICE_TABLE(hid, mr_devices); static struct hid_driver mr_driver = { .name = "monterey", .id_table = mr_devices, .report_fixup = mr_report_fixup, .input_mapping = mr_input_mapping, }; module_hid_driver(mr_driver); MODULE_DESCRIPTION("HID driver for some monterey \"special\" devices"); MODULE_LICENSE("GPL"); |
18 18 18 19 17 17 21 15 19 516 205 472 473 501 155 282 281 470 279 124 36 23 36 32 150 124 58 98 100 14 1 6 6 3 8 4 4 13 8 6 108 11 100 107 109 107 100 9 1 101 56 59 34 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 | // SPDX-License-Identifier: GPL-2.0-or-later /* Generic associative array implementation. * * See Documentation/core-api/assoc_array.rst for information. * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ //#define DEBUG #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/assoc_array_priv.h> /* * Iterate over an associative array. The caller must hold the RCU read lock * or better. */ static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, const struct assoc_array_ptr *stop, int (*iterator)(const void *leaf, void *iterator_data), void *iterator_data) { const struct assoc_array_shortcut *shortcut; const struct assoc_array_node *node; const struct assoc_array_ptr *cursor, *ptr, *parent; unsigned long has_meta; int slot, ret; cursor = root; begin_node: if (assoc_array_ptr_is_shortcut(cursor)) { /* Descend through a shortcut */ shortcut = assoc_array_ptr_to_shortcut(cursor); cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ } node = assoc_array_ptr_to_node(cursor); slot = 0; /* We perform two passes of each node. * * The first pass does all the leaves in this node. This means we * don't miss any leaves if the node is split up by insertion whilst * we're iterating over the branches rooted here (we may, however, see * some leaves twice). */ has_meta = 0; for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ has_meta |= (unsigned long)ptr; if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer, * which is supplied by the above READ_ONCE(). */ /* Invoke the callback */ ret = iterator(assoc_array_ptr_to_leaf(ptr), iterator_data); if (ret) return ret; } } /* The second pass attends to all the metadata pointers. If we follow * one of these we may find that we don't come back here, but rather go * back to a replacement node with the leaves in a different layout. * * We are guaranteed to make progress, however, as the slot number for * a particular portion of the key space cannot change - and we * continue at the back pointer + 1. */ if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) goto finished_node; slot = 0; continue_node: node = assoc_array_ptr_to_node(cursor); for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (assoc_array_ptr_is_meta(ptr)) { cursor = ptr; goto begin_node; } } finished_node: /* Move up to the parent (may need to skip back over a shortcut) */ parent = READ_ONCE(node->back_pointer); /* Address dependency. */ slot = node->parent_slot; if (parent == stop) return 0; if (assoc_array_ptr_is_shortcut(parent)) { shortcut = assoc_array_ptr_to_shortcut(parent); cursor = parent; parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */ slot = shortcut->parent_slot; if (parent == stop) return 0; } /* Ascend to next slot in parent node */ cursor = parent; slot++; goto continue_node; } /** * assoc_array_iterate - Pass all objects in the array to a callback * @array: The array to iterate over. * @iterator: The callback function. * @iterator_data: Private data for the callback function. * * Iterate over all the objects in an associative array. Each one will be * presented to the iterator function. * * If the array is being modified concurrently with the iteration then it is * possible that some objects in the array will be passed to the iterator * callback more than once - though every object should be passed at least * once. If this is undesirable then the caller must lock against modification * for the duration of this function. * * The function will return 0 if no objects were in the array or else it will * return the result of the last iterator function called. Iteration stops * immediately if any call to the iteration function results in a non-zero * return. * * The caller should hold the RCU read lock or better if concurrent * modification is possible. */ int assoc_array_iterate(const struct assoc_array *array, int (*iterator)(const void *object, void *iterator_data), void *iterator_data) { struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */ if (!root) return 0; return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); } enum assoc_array_walk_status { assoc_array_walk_tree_empty, assoc_array_walk_found_terminal_node, assoc_array_walk_found_wrong_shortcut, }; struct assoc_array_walk_result { struct { struct assoc_array_node *node; /* Node in which leaf might be found */ int level; int slot; } terminal_node; struct { struct assoc_array_shortcut *shortcut; int level; int sc_level; unsigned long sc_segments; unsigned long dissimilarity; } wrong_shortcut; }; /* * Navigate through the internal tree looking for the closest node to the key. */ static enum assoc_array_walk_status assoc_array_walk(const struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key, struct assoc_array_walk_result *result) { struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *cursor, *ptr; unsigned long sc_segments, dissimilarity; unsigned long segments; int level, sc_level, next_sc_level; int slot; pr_devel("-->%s()\n", __func__); cursor = READ_ONCE(array->root); /* Address dependency. */ if (!cursor) return assoc_array_walk_tree_empty; level = 0; /* Use segments from the key for the new leaf to navigate through the * internal tree, skipping through nodes and shortcuts that are on * route to the destination. Eventually we'll come to a slot that is * either empty or contains a leaf at which point we've found a node in * which the leaf we're looking for might be found or into which it * should be inserted. */ jumped: segments = ops->get_key_chunk(index_key, level); pr_devel("segments[%d]: %lx\n", level, segments); if (assoc_array_ptr_is_shortcut(cursor)) goto follow_shortcut; consider_node: node = assoc_array_ptr_to_node(cursor); slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); slot &= ASSOC_ARRAY_FAN_MASK; ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ pr_devel("consider slot %x [ix=%d type=%lu]\n", slot, level, (unsigned long)ptr & 3); if (!assoc_array_ptr_is_meta(ptr)) { /* The node doesn't have a node/shortcut pointer in the slot * corresponding to the index key that we have to follow. */ result->terminal_node.node = node; result->terminal_node.level = level; result->terminal_node.slot = slot; pr_devel("<--%s() = terminal_node\n", __func__); return assoc_array_walk_found_terminal_node; } if (assoc_array_ptr_is_node(ptr)) { /* There is a pointer to a node in the slot corresponding to * this index key segment, so we need to follow it. */ cursor = ptr; level += ASSOC_ARRAY_LEVEL_STEP; if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) goto consider_node; goto jumped; } /* There is a shortcut in the slot corresponding to the index key * segment. We follow the shortcut if its partial index key matches * this leaf's. Otherwise we need to split the shortcut. */ cursor = ptr; follow_shortcut: shortcut = assoc_array_ptr_to_shortcut(cursor); pr_devel("shortcut to %d\n", shortcut->skip_to_level); sc_level = level + ASSOC_ARRAY_LEVEL_STEP; BUG_ON(sc_level > shortcut->skip_to_level); do { /* Check the leaf against the shortcut's index key a word at a * time, trimming the final word (the shortcut stores the index * key completely from the root to the shortcut's target). */ if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) segments = ops->get_key_chunk(index_key, sc_level); sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; dissimilarity = segments ^ sc_segments; if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { /* Trim segments that are beyond the shortcut */ int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; dissimilarity &= ~(ULONG_MAX << shift); next_sc_level = shortcut->skip_to_level; } else { next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); } if (dissimilarity != 0) { /* This shortcut points elsewhere */ result->wrong_shortcut.shortcut = shortcut; result->wrong_shortcut.level = level; result->wrong_shortcut.sc_level = sc_level; result->wrong_shortcut.sc_segments = sc_segments; result->wrong_shortcut.dissimilarity = dissimilarity; return assoc_array_walk_found_wrong_shortcut; } sc_level = next_sc_level; } while (sc_level < shortcut->skip_to_level); /* The shortcut matches the leaf's index to this point. */ cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { level = sc_level; goto jumped; } else { level = sc_level; goto consider_node; } } /** * assoc_array_find - Find an object by index key * @array: The associative array to search. * @ops: The operations to use. * @index_key: The key to the object. * * Find an object in an associative array by walking through the internal tree * to the node that should contain the object and then searching the leaves * there. NULL is returned if the requested object was not found in the array. * * The caller must hold the RCU read lock or better. */ void *assoc_array_find(const struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key) { struct assoc_array_walk_result result; const struct assoc_array_node *node; const struct assoc_array_ptr *ptr; const void *leaf; int slot; if (assoc_array_walk(array, ops, index_key, &result) != assoc_array_walk_found_terminal_node) return NULL; node = result.terminal_node.node; /* If the target key is available to us, it's has to be pointed to by * the terminal node. */ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer * and dereferencing the pointer - but only if we are * actually going to dereference it. */ leaf = assoc_array_ptr_to_leaf(ptr); if (ops->compare_object(leaf, index_key)) return (void *)leaf; } } return NULL; } /* * Destructively iterate over an associative array. The caller must prevent * other simultaneous accesses. */ static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, const struct assoc_array_ops *ops) { struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *cursor, *parent = NULL; int slot = -1; pr_devel("-->%s()\n", __func__); cursor = root; if (!cursor) { pr_devel("empty\n"); return; } move_to_meta: if (assoc_array_ptr_is_shortcut(cursor)) { /* Descend through a shortcut */ pr_devel("[%d] shortcut\n", slot); BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); shortcut = assoc_array_ptr_to_shortcut(cursor); BUG_ON(shortcut->back_pointer != parent); BUG_ON(slot != -1 && shortcut->parent_slot != slot); parent = cursor; cursor = shortcut->next_node; slot = -1; BUG_ON(!assoc_array_ptr_is_node(cursor)); } pr_devel("[%d] node\n", slot); node = assoc_array_ptr_to_node(cursor); BUG_ON(node->back_pointer != parent); BUG_ON(slot != -1 && node->parent_slot != slot); slot = 0; continue_node: pr_devel("Node %p [back=%p]\n", node, node->back_pointer); for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { struct assoc_array_ptr *ptr = node->slots[slot]; if (!ptr) continue; if (assoc_array_ptr_is_meta(ptr)) { parent = cursor; cursor = ptr; goto move_to_meta; } if (ops) { pr_devel("[%d] free leaf\n", slot); ops->free_object(assoc_array_ptr_to_leaf(ptr)); } } parent = node->back_pointer; slot = node->parent_slot; pr_devel("free node\n"); kfree(node); if (!parent) return; /* Done */ /* Move back up to the parent (may need to free a shortcut on * the way up) */ if (assoc_array_ptr_is_shortcut(parent)) { shortcut = assoc_array_ptr_to_shortcut(parent); BUG_ON(shortcut->next_node != cursor); cursor = parent; parent = shortcut->back_pointer; slot = shortcut->parent_slot; pr_devel("free shortcut\n"); kfree(shortcut); if (!parent) return; BUG_ON(!assoc_array_ptr_is_node(parent)); } /* Ascend to next slot in parent node */ pr_devel("ascend to %p[%d]\n", parent, slot); cursor = parent; node = assoc_array_ptr_to_node(cursor); slot++; goto continue_node; } /** * assoc_array_destroy - Destroy an associative array * @array: The array to destroy. * @ops: The operations to use. * * Discard all metadata and free all objects in an associative array. The * array will be empty and ready to use again upon completion. This function * cannot fail. * * The caller must prevent all other accesses whilst this takes place as no * attempt is made to adjust pointers gracefully to permit RCU readlock-holding * accesses to continue. On the other hand, no memory allocation is required. */ void assoc_array_destroy(struct assoc_array *array, const struct assoc_array_ops *ops) { assoc_array_destroy_subtree(array->root, ops); array->root = NULL; } /* * Handle insertion into an empty tree. */ static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) { struct assoc_array_node *new_n0; pr_devel("-->%s()\n", __func__); new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) return false; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); edit->leaf_p = &new_n0->slots[0]; edit->adjust_count_on = new_n0; edit->set[0].ptr = &edit->array->root; edit->set[0].to = assoc_array_node_to_ptr(new_n0); pr_devel("<--%s() = ok [no root]\n", __func__); return true; } /* * Handle insertion into a terminal node. */ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, const struct assoc_array_ops *ops, const void *index_key, struct assoc_array_walk_result *result) { struct assoc_array_shortcut *shortcut, *new_s0; struct assoc_array_node *node, *new_n0, *new_n1, *side; struct assoc_array_ptr *ptr; unsigned long dissimilarity, base_seg, blank; size_t keylen; bool have_meta; int level, diff; int slot, next_slot, free_slot, i, j; node = result->terminal_node.node; level = result->terminal_node.level; edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; pr_devel("-->%s()\n", __func__); /* We arrived at a node which doesn't have an onward node or shortcut * pointer that we have to follow. This means that (a) the leaf we * want must go here (either by insertion or replacement) or (b) we * need to split this node and insert in one of the fragments. */ free_slot = -1; /* Firstly, we have to check the leaves in this node to see if there's * a matching one we should replace in place. */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; if (!ptr) { free_slot = i; continue; } if (assoc_array_ptr_is_leaf(ptr) && ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { pr_devel("replace in slot %d\n", i); edit->leaf_p = &node->slots[i]; edit->dead_leaf = node->slots[i]; pr_devel("<--%s() = ok [replace]\n", __func__); return true; } } /* If there is a free slot in this node then we can just insert the * leaf here. */ if (free_slot >= 0) { pr_devel("insert in free slot %d\n", free_slot); edit->leaf_p = &node->slots[free_slot]; edit->adjust_count_on = node; pr_devel("<--%s() = ok [insert]\n", __func__); return true; } /* The node has no spare slots - so we're either going to have to split * it or insert another node before it. * * Whatever, we're going to need at least two new nodes - so allocate * those now. We may also need a new shortcut, but we deal with that * when we need it. */ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) return false; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n1) return false; edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); /* We need to find out how similar the leaves are. */ pr_devel("no spare slots\n"); have_meta = false; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; if (assoc_array_ptr_is_meta(ptr)) { edit->segment_cache[i] = 0xff; have_meta = true; continue; } base_seg = ops->get_object_key_chunk( assoc_array_ptr_to_leaf(ptr), level); base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; } if (have_meta) { pr_devel("have meta\n"); goto split_node; } /* The node contains only leaves */ dissimilarity = 0; base_seg = edit->segment_cache[0]; for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) dissimilarity |= edit->segment_cache[i] ^ base_seg; pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { /* The old leaves all cluster in the same slot. We will need * to insert a shortcut if the new node wants to cluster with them. */ if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) goto all_leaves_cluster_together; /* Otherwise all the old leaves cluster in the same slot, but * the new leaf wants to go into a different slot - so we * create a new node (n0) to hold the new leaf and a pointer to * a new node (n1) holding all the old leaves. * * This can be done by falling through to the node splitting * path. */ pr_devel("present leaves cluster but not new leaf\n"); } split_node: pr_devel("split node\n"); /* We need to split the current node. The node must contain anything * from a single leaf (in the one leaf case, this leaf will cluster * with the new leaf) and the rest meta-pointers, to all leaves, some * of which may cluster. * * It won't contain the case in which all the current leaves plus the * new leaves want to cluster in the same slot. * * We need to expel at least two leaves out of a set consisting of the * leaves in the node and the new leaf. The current meta pointers can * just be copied as they shouldn't cluster with any of the leaves. * * We need a new node (n0) to replace the current one and a new node to * take the expelled nodes (n1). */ edit->set[0].to = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = node->back_pointer; new_n0->parent_slot = node->parent_slot; new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); new_n1->parent_slot = -1; /* Need to calculate this */ do_split_node: pr_devel("do_split_node\n"); new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; new_n1->nr_leaves_on_branch = 0; /* Begin by finding two matching leaves. There have to be at least two * that match - even if there are meta pointers - because any leaf that * would match a slot with a meta pointer in it must be somewhere * behind that meta pointer and cannot be here. Further, given N * remaining leaf slots, we now have N+1 leaves to go in them. */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { slot = edit->segment_cache[i]; if (slot != 0xff) for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) if (edit->segment_cache[j] == slot) goto found_slot_for_multiple_occupancy; } found_slot_for_multiple_occupancy: pr_devel("same slot: %x %x [%02x]\n", i, j, slot); BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); new_n1->parent_slot = slot; /* Metadata pointers cannot change slot */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) if (assoc_array_ptr_is_meta(node->slots[i])) new_n0->slots[i] = node->slots[i]; else new_n0->slots[i] = NULL; BUG_ON(new_n0->slots[slot] != NULL); new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); /* Filter the leaf pointers between the new nodes */ free_slot = -1; next_slot = 0; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { if (assoc_array_ptr_is_meta(node->slots[i])) continue; if (edit->segment_cache[i] == slot) { new_n1->slots[next_slot++] = node->slots[i]; new_n1->nr_leaves_on_branch++; } else { do { free_slot++; } while (new_n0->slots[free_slot] != NULL); new_n0->slots[free_slot] = node->slots[i]; } } pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { do { free_slot++; } while (new_n0->slots[free_slot] != NULL); edit->leaf_p = &new_n0->slots[free_slot]; edit->adjust_count_on = new_n0; } else { edit->leaf_p = &new_n1->slots[next_slot++]; edit->adjust_count_on = new_n1; } BUG_ON(next_slot <= 1); edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { if (edit->segment_cache[i] == 0xff) { ptr = node->slots[i]; BUG_ON(assoc_array_ptr_is_leaf(ptr)); if (assoc_array_ptr_is_node(ptr)) { side = assoc_array_ptr_to_node(ptr); edit->set_backpointers[i] = &side->back_pointer; } else { shortcut = assoc_array_ptr_to_shortcut(ptr); edit->set_backpointers[i] = &shortcut->back_pointer; } } } ptr = node->back_pointer; if (!ptr) edit->set[0].ptr = &edit->array->root; else if (assoc_array_ptr_is_node(ptr)) edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; else edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; edit->excised_meta[0] = assoc_array_node_to_ptr(node); pr_devel("<--%s() = ok [split node]\n", __func__); return true; all_leaves_cluster_together: /* All the leaves, new and old, want to cluster together in this node * in the same slot, so we have to replace this node with a shortcut to * skip over the identical parts of the key and then place a pair of * nodes, one inside the other, at the end of the shortcut and * distribute the keys between them. * * Firstly we need to work out where the leaves start diverging as a * bit position into their keys so that we know how big the shortcut * needs to be. * * We only need to make a single pass of N of the N+1 leaves because if * any keys differ between themselves at bit X then at least one of * them must also differ with the base key at bit X or before. */ pr_devel("all leaves cluster together\n"); diff = INT_MAX; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), index_key); if (x < diff) { BUG_ON(x < 0); diff = x; } } BUG_ON(diff == INT_MAX); BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), GFP_KERNEL); if (!new_s0) return false; edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); new_s0->back_pointer = node->back_pointer; new_s0->parent_slot = node->parent_slot; new_s0->next_node = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); new_n0->parent_slot = 0; new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); new_n1->parent_slot = -1; /* Need to calculate this */ new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; pr_devel("skip_to_level = %d [diff %d]\n", level, diff); BUG_ON(level <= 0); for (i = 0; i < keylen; i++) new_s0->index_key[i] = ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); new_s0->index_key[keylen - 1] &= ~blank; } /* This now reduces to a node splitting exercise for which we'll need * to regenerate the disparity table. */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), level); base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; } base_seg = ops->get_key_chunk(index_key, level); base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; goto do_split_node; } /* * Handle insertion into the middle of a shortcut. */ static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, const struct assoc_array_ops *ops, struct assoc_array_walk_result *result) { struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; struct assoc_array_node *node, *new_n0, *side; unsigned long sc_segments, dissimilarity, blank; size_t keylen; int level, sc_level, diff; int sc_slot; shortcut = result->wrong_shortcut.shortcut; level = result->wrong_shortcut.level; sc_level = result->wrong_shortcut.sc_level; sc_segments = result->wrong_shortcut.sc_segments; dissimilarity = result->wrong_shortcut.dissimilarity; pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", __func__, level, dissimilarity, sc_level); /* We need to split a shortcut and insert a node between the two * pieces. Zero-length pieces will be dispensed with entirely. * * First of all, we need to find out in which level the first * difference was. */ diff = __ffs(dissimilarity); diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; pr_devel("diff=%d\n", diff); if (!shortcut->back_pointer) { edit->set[0].ptr = &edit->array->root; } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { node = assoc_array_ptr_to_node(shortcut->back_pointer); edit->set[0].ptr = &node->slots[shortcut->parent_slot]; } else { BUG(); } edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); /* Create a new node now since we're going to need it anyway */ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) return false; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); edit->adjust_count_on = new_n0; /* Insert a new shortcut before the new node if this segment isn't of * zero length - otherwise we just connect the new node directly to the * parent. */ level += ASSOC_ARRAY_LEVEL_STEP; if (diff > level) { pr_devel("pre-shortcut %d...%d\n", level, diff); keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), GFP_KERNEL); if (!new_s0) return false; edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); new_s0->back_pointer = shortcut->back_pointer; new_s0->parent_slot = shortcut->parent_slot; new_s0->next_node = assoc_array_node_to_ptr(new_n0); new_s0->skip_to_level = diff; new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); new_n0->parent_slot = 0; memcpy(new_s0->index_key, shortcut->index_key, flex_array_size(new_s0, index_key, keylen)); blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); new_s0->index_key[keylen - 1] &= ~blank; } else { pr_devel("no pre-shortcut\n"); edit->set[0].to = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = shortcut->back_pointer; new_n0->parent_slot = shortcut->parent_slot; } side = assoc_array_ptr_to_node(shortcut->next_node); new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; /* We need to know which slot in the new node is going to take a * metadata pointer. */ sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); sc_slot &= ASSOC_ARRAY_FAN_MASK; pr_devel("new slot %lx >> %d -> %d\n", sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); /* Determine whether we need to follow the new node with a replacement * for the current shortcut. We could in theory reuse the current * shortcut if its parent slot number doesn't change - but that's a * 1-in-16 chance so not worth expending the code upon. */ level = diff + ASSOC_ARRAY_LEVEL_STEP; if (level < shortcut->skip_to_level) { pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s1 = kzalloc(struct_size(new_s1, index_key, keylen), GFP_KERNEL); if (!new_s1) return false; edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); new_s1->parent_slot = sc_slot; new_s1->next_node = shortcut->next_node; new_s1->skip_to_level = shortcut->skip_to_level; new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); memcpy(new_s1->index_key, shortcut->index_key, flex_array_size(new_s1, index_key, keylen)); edit->set[1].ptr = &side->back_pointer; edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); } else { pr_devel("no post-shortcut\n"); /* We don't have to replace the pointed-to node as long as we * use memory barriers to make sure the parent slot number is * changed before the back pointer (the parent slot number is * irrelevant to the old parent shortcut). */ new_n0->slots[sc_slot] = shortcut->next_node; edit->set_parent_slot[0].p = &side->parent_slot; edit->set_parent_slot[0].to = sc_slot; edit->set[1].ptr = &side->back_pointer; edit->set[1].to = assoc_array_node_to_ptr(new_n0); } /* Install the new leaf in a spare slot in the new node. */ if (sc_slot == 0) edit->leaf_p = &new_n0->slots[1]; else edit->leaf_p = &new_n0->slots[0]; pr_devel("<--%s() = ok [split shortcut]\n", __func__); return true; } /** * assoc_array_insert - Script insertion of an object into an associative array * @array: The array to insert into. * @ops: The operations to use. * @index_key: The key to insert at. * @object: The object to insert. * * Precalculate and preallocate a script for the insertion or replacement of an * object in an associative array. This results in an edit script that can * either be applied or cancelled. * * The function returns a pointer to an edit script or -ENOMEM. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key, void *object) { struct assoc_array_walk_result result; struct assoc_array_edit *edit; pr_devel("-->%s()\n", __func__); /* The leaf pointer we're given must not have the bottom bit set as we * use those for type-marking the pointer. NULL pointers are also not * allowed as they indicate an empty slot but we have to allow them * here as they can be updated later. */ BUG_ON(assoc_array_ptr_is_meta(object)); edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return ERR_PTR(-ENOMEM); edit->array = array; edit->ops = ops; edit->leaf = assoc_array_leaf_to_ptr(object); edit->adjust_count_by = 1; switch (assoc_array_walk(array, ops, index_key, &result)) { case assoc_array_walk_tree_empty: /* Allocate a root node if there isn't one yet */ if (!assoc_array_insert_in_empty_tree(edit)) goto enomem; return edit; case assoc_array_walk_found_terminal_node: /* We found a node that doesn't have a node/shortcut pointer in * the slot corresponding to the index key that we have to * follow. */ if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, &result)) goto enomem; return edit; case assoc_array_walk_found_wrong_shortcut: /* We found a shortcut that didn't match our key in a slot we * needed to follow. */ if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) goto enomem; return edit; } enomem: /* Clean up after an out of memory error */ pr_devel("enomem\n"); assoc_array_cancel_edit(edit); return ERR_PTR(-ENOMEM); } /** * assoc_array_insert_set_object - Set the new object pointer in an edit script * @edit: The edit script to modify. * @object: The object pointer to set. * * Change the object to be inserted in an edit script. The object pointed to * by the old object is not freed. This must be done prior to applying the * script. */ void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) { BUG_ON(!object); edit->leaf = assoc_array_leaf_to_ptr(object); } struct assoc_array_delete_collapse_context { struct assoc_array_node *node; const void *skip_leaf; int slot; }; /* * Subtree collapse to node iterator. */ static int assoc_array_delete_collapse_iterator(const void *leaf, void *iterator_data) { struct assoc_array_delete_collapse_context *collapse = iterator_data; if (leaf == collapse->skip_leaf) return 0; BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); return 0; } /** * assoc_array_delete - Script deletion of an object from an associative array * @array: The array to search. * @ops: The operations to use. * @index_key: The key to the object. * * Precalculate and preallocate a script for the deletion of an object from an * associative array. This results in an edit script that can either be * applied or cancelled. * * The function returns a pointer to an edit script if the object was found, * NULL if the object was not found or -ENOMEM. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key) { struct assoc_array_delete_collapse_context collapse; struct assoc_array_walk_result result; struct assoc_array_node *node, *new_n0; struct assoc_array_edit *edit; struct assoc_array_ptr *ptr; bool has_meta; int slot, i; pr_devel("-->%s()\n", __func__); edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return ERR_PTR(-ENOMEM); edit->array = array; edit->ops = ops; edit->adjust_count_by = -1; switch (assoc_array_walk(array, ops, index_key, &result)) { case assoc_array_walk_found_terminal_node: /* We found a node that should contain the leaf we've been * asked to remove - *if* it's in the tree. */ pr_devel("terminal_node\n"); node = result.terminal_node.node; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = node->slots[slot]; if (ptr && assoc_array_ptr_is_leaf(ptr) && ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) goto found_leaf; } fallthrough; case assoc_array_walk_tree_empty: case assoc_array_walk_found_wrong_shortcut: default: assoc_array_cancel_edit(edit); pr_devel("not found\n"); return NULL; } found_leaf: BUG_ON(array->nr_leaves_on_tree <= 0); /* In the simplest form of deletion we just clear the slot and release * the leaf after a suitable interval. */ edit->dead_leaf = node->slots[slot]; edit->set[0].ptr = &node->slots[slot]; edit->set[0].to = NULL; edit->adjust_count_on = node; /* If that concludes erasure of the last leaf, then delete the entire * internal array. */ if (array->nr_leaves_on_tree == 1) { edit->set[1].ptr = &array->root; edit->set[1].to = NULL; edit->adjust_count_on = NULL; edit->excised_subtree = array->root; pr_devel("all gone\n"); return edit; } /* However, we'd also like to clear up some metadata blocks if we * possibly can. * * We go for a simple algorithm of: if this node has FAN_OUT or fewer * leaves in it, then attempt to collapse it - and attempt to * recursively collapse up the tree. * * We could also try and collapse in partially filled subtrees to take * up space in this node. */ if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { struct assoc_array_node *parent, *grandparent; struct assoc_array_ptr *ptr; /* First of all, we need to know if this node has metadata so * that we don't try collapsing if all the leaves are already * here. */ has_meta = false; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; if (assoc_array_ptr_is_meta(ptr)) { has_meta = true; break; } } pr_devel("leaves: %ld [m=%d]\n", node->nr_leaves_on_branch - 1, has_meta); /* Look further up the tree to see if we can collapse this node * into a more proximal node too. */ parent = node; collapse_up: pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); ptr = parent->back_pointer; if (!ptr) goto do_collapse; if (assoc_array_ptr_is_shortcut(ptr)) { struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); ptr = s->back_pointer; if (!ptr) goto do_collapse; } grandparent = assoc_array_ptr_to_node(ptr); if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { parent = grandparent; goto collapse_up; } do_collapse: /* There's no point collapsing if the original node has no meta * pointers to discard and if we didn't merge into one of that * node's ancestry. */ if (has_meta || parent != node) { node = parent; /* Create a new node to collapse into */ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) goto enomem; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = node->back_pointer; new_n0->parent_slot = node->parent_slot; new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; edit->adjust_count_on = new_n0; collapse.node = new_n0; collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); collapse.slot = 0; assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), node->back_pointer, assoc_array_delete_collapse_iterator, &collapse); pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); if (!node->back_pointer) { edit->set[1].ptr = &array->root; } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { BUG(); } else if (assoc_array_ptr_is_node(node->back_pointer)) { struct assoc_array_node *p = assoc_array_ptr_to_node(node->back_pointer); edit->set[1].ptr = &p->slots[node->parent_slot]; } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(node->back_pointer); edit->set[1].ptr = &s->next_node; } edit->set[1].to = assoc_array_node_to_ptr(new_n0); edit->excised_subtree = assoc_array_node_to_ptr(node); } } return edit; enomem: /* Clean up after an out of memory error */ pr_devel("enomem\n"); assoc_array_cancel_edit(edit); return ERR_PTR(-ENOMEM); } /** * assoc_array_clear - Script deletion of all objects from an associative array * @array: The array to clear. * @ops: The operations to use. * * Precalculate and preallocate a script for the deletion of all the objects * from an associative array. This results in an edit script that can either * be applied or cancelled. * * The function returns a pointer to an edit script if there are objects to be * deleted, NULL if there are no objects in the array or -ENOMEM. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, const struct assoc_array_ops *ops) { struct assoc_array_edit *edit; pr_devel("-->%s()\n", __func__); if (!array->root) return NULL; edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return ERR_PTR(-ENOMEM); edit->array = array; edit->ops = ops; edit->set[1].ptr = &array->root; edit->set[1].to = NULL; edit->excised_subtree = array->root; edit->ops_for_excised_subtree = ops; pr_devel("all gone\n"); return edit; } /* * Handle the deferred destruction after an applied edit. */ static void assoc_array_rcu_cleanup(struct rcu_head *head) { struct assoc_array_edit *edit = container_of(head, struct assoc_array_edit, rcu); int i; pr_devel("-->%s()\n", __func__); if (edit->dead_leaf) edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) if (edit->excised_meta[i]) kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); if (edit->excised_subtree) { BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); if (assoc_array_ptr_is_node(edit->excised_subtree)) { struct assoc_array_node *n = assoc_array_ptr_to_node(edit->excised_subtree); n->back_pointer = NULL; } else { struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(edit->excised_subtree); s->back_pointer = NULL; } assoc_array_destroy_subtree(edit->excised_subtree, edit->ops_for_excised_subtree); } kfree(edit); } /** * assoc_array_apply_edit - Apply an edit script to an associative array * @edit: The script to apply. * * Apply an edit script to an associative array to effect an insertion, * deletion or clearance. As the edit script includes preallocated memory, * this is guaranteed not to fail. * * The edit script, dead objects and dead metadata will be scheduled for * destruction after an RCU grace period to permit those doing read-only * accesses on the array to continue to do so under the RCU read lock whilst * the edit is taking place. */ void assoc_array_apply_edit(struct assoc_array_edit *edit) { struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *ptr; int i; pr_devel("-->%s()\n", __func__); smp_wmb(); if (edit->leaf_p) *edit->leaf_p = edit->leaf; smp_wmb(); for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) if (edit->set_parent_slot[i].p) *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; smp_wmb(); for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) if (edit->set_backpointers[i]) *edit->set_backpointers[i] = edit->set_backpointers_to; smp_wmb(); for (i = 0; i < ARRAY_SIZE(edit->set); i++) if (edit->set[i].ptr) *edit->set[i].ptr = edit->set[i].to; if (edit->array->root == NULL) { edit->array->nr_leaves_on_tree = 0; } else if (edit->adjust_count_on) { node = edit->adjust_count_on; for (;;) { node->nr_leaves_on_branch += edit->adjust_count_by; ptr = node->back_pointer; if (!ptr) break; if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); ptr = shortcut->back_pointer; if (!ptr) break; } BUG_ON(!assoc_array_ptr_is_node(ptr)); node = assoc_array_ptr_to_node(ptr); } edit->array->nr_leaves_on_tree += edit->adjust_count_by; } call_rcu(&edit->rcu, assoc_array_rcu_cleanup); } /** * assoc_array_cancel_edit - Discard an edit script. * @edit: The script to discard. * * Free an edit script and all the preallocated data it holds without making * any changes to the associative array it was intended for. * * NOTE! In the case of an insertion script, this does _not_ release the leaf * that was to be inserted. That is left to the caller. */ void assoc_array_cancel_edit(struct assoc_array_edit *edit) { struct assoc_array_ptr *ptr; int i; pr_devel("-->%s()\n", __func__); /* Clean up after an out of memory error */ for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { ptr = edit->new_meta[i]; if (ptr) { if (assoc_array_ptr_is_node(ptr)) kfree(assoc_array_ptr_to_node(ptr)); else kfree(assoc_array_ptr_to_shortcut(ptr)); } } kfree(edit); } /** * assoc_array_gc - Garbage collect an associative array. * @array: The array to clean. * @ops: The operations to use. * @iterator: A callback function to pass judgement on each object. * @iterator_data: Private data for the callback function. * * Collect garbage from an associative array and pack down the internal tree to * save memory. * * The iterator function is asked to pass judgement upon each object in the * array. If it returns false, the object is discard and if it returns true, * the object is kept. If it returns true, it must increment the object's * usage count (or whatever it needs to do to retain it) before returning. * * This function returns 0 if successful or -ENOMEM if out of memory. In the * latter case, the array is not changed. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ int assoc_array_gc(struct assoc_array *array, const struct assoc_array_ops *ops, bool (*iterator)(void *object, void *iterator_data), void *iterator_data) { struct assoc_array_shortcut *shortcut, *new_s; struct assoc_array_node *node, *new_n; struct assoc_array_edit *edit; struct assoc_array_ptr *cursor, *ptr; struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; unsigned long nr_leaves_on_tree; bool retained; int keylen, slot, nr_free, next_slot, i; pr_devel("-->%s()\n", __func__); if (!array->root) return 0; edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return -ENOMEM; edit->array = array; edit->ops = ops; edit->ops_for_excised_subtree = ops; edit->set[0].ptr = &array->root; edit->excised_subtree = array->root; new_root = new_parent = NULL; new_ptr_pp = &new_root; cursor = array->root; descend: /* If this point is a shortcut, then we need to duplicate it and * advance the target cursor. */ if (assoc_array_ptr_is_shortcut(cursor)) { shortcut = assoc_array_ptr_to_shortcut(cursor); keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s = kmalloc(struct_size(new_s, index_key, keylen), GFP_KERNEL); if (!new_s) goto enomem; pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); memcpy(new_s, shortcut, struct_size(new_s, index_key, keylen)); new_s->back_pointer = new_parent; new_s->parent_slot = shortcut->parent_slot; *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); new_ptr_pp = &new_s->next_node; cursor = shortcut->next_node; } /* Duplicate the node at this position */ node = assoc_array_ptr_to_node(cursor); new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n) goto enomem; pr_devel("dup node %p -> %p\n", node, new_n); new_n->back_pointer = new_parent; new_n->parent_slot = node->parent_slot; *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); new_ptr_pp = NULL; slot = 0; continue_node: /* Filter across any leaves and gc any subtrees */ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = node->slots[slot]; if (!ptr) continue; if (assoc_array_ptr_is_leaf(ptr)) { if (iterator(assoc_array_ptr_to_leaf(ptr), iterator_data)) /* The iterator will have done any reference * counting on the object for us. */ new_n->slots[slot] = ptr; continue; } new_ptr_pp = &new_n->slots[slot]; cursor = ptr; goto descend; } retry_compress: pr_devel("-- compress node %p --\n", new_n); /* Count up the number of empty slots in this node and work out the * subtree leaf count. */ new_n->nr_leaves_on_branch = 0; nr_free = 0; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = new_n->slots[slot]; if (!ptr) nr_free++; else if (assoc_array_ptr_is_leaf(ptr)) new_n->nr_leaves_on_branch++; } pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); /* See what we can fold in */ retained = false; next_slot = 0; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { struct assoc_array_shortcut *s; struct assoc_array_node *child; ptr = new_n->slots[slot]; if (!ptr || assoc_array_ptr_is_leaf(ptr)) continue; s = NULL; if (assoc_array_ptr_is_shortcut(ptr)) { s = assoc_array_ptr_to_shortcut(ptr); ptr = s->next_node; } child = assoc_array_ptr_to_node(ptr); new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; if (child->nr_leaves_on_branch <= nr_free + 1) { /* Fold the child node into this one */ pr_devel("[%d] fold node %lu/%d [nx %d]\n", slot, child->nr_leaves_on_branch, nr_free + 1, next_slot); /* We would already have reaped an intervening shortcut * on the way back up the tree. */ BUG_ON(s); new_n->slots[slot] = NULL; nr_free++; if (slot < next_slot) next_slot = slot; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { struct assoc_array_ptr *p = child->slots[i]; if (!p) continue; BUG_ON(assoc_array_ptr_is_meta(p)); while (new_n->slots[next_slot]) next_slot++; BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); new_n->slots[next_slot++] = p; nr_free--; } kfree(child); } else { pr_devel("[%d] retain node %lu/%d [nx %d]\n", slot, child->nr_leaves_on_branch, nr_free + 1, next_slot); retained = true; } } if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { pr_devel("internal nodes remain despite enough space, retrying\n"); goto retry_compress; } pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); nr_leaves_on_tree = new_n->nr_leaves_on_branch; /* Excise this node if it is singly occupied by a shortcut */ if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) if ((ptr = new_n->slots[slot])) break; if (assoc_array_ptr_is_meta(ptr) && assoc_array_ptr_is_shortcut(ptr)) { pr_devel("excise node %p with 1 shortcut\n", new_n); new_s = assoc_array_ptr_to_shortcut(ptr); new_parent = new_n->back_pointer; slot = new_n->parent_slot; kfree(new_n); if (!new_parent) { new_s->back_pointer = NULL; new_s->parent_slot = 0; new_root = ptr; goto gc_complete; } if (assoc_array_ptr_is_shortcut(new_parent)) { /* We can discard any preceding shortcut also */ struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(new_parent); pr_devel("excise preceding shortcut\n"); new_parent = new_s->back_pointer = s->back_pointer; slot = new_s->parent_slot = s->parent_slot; kfree(s); if (!new_parent) { new_s->back_pointer = NULL; new_s->parent_slot = 0; new_root = ptr; goto gc_complete; } } new_s->back_pointer = new_parent; new_s->parent_slot = slot; new_n = assoc_array_ptr_to_node(new_parent); new_n->slots[slot] = ptr; goto ascend_old_tree; } } /* Excise any shortcuts we might encounter that point to nodes that * only contain leaves. */ ptr = new_n->back_pointer; if (!ptr) goto gc_complete; if (assoc_array_ptr_is_shortcut(ptr)) { new_s = assoc_array_ptr_to_shortcut(ptr); new_parent = new_s->back_pointer; slot = new_s->parent_slot; if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { struct assoc_array_node *n; pr_devel("excise shortcut\n"); new_n->back_pointer = new_parent; new_n->parent_slot = slot; kfree(new_s); if (!new_parent) { new_root = assoc_array_node_to_ptr(new_n); goto gc_complete; } n = assoc_array_ptr_to_node(new_parent); n->slots[slot] = assoc_array_node_to_ptr(new_n); } } else { new_parent = ptr; } new_n = assoc_array_ptr_to_node(new_parent); ascend_old_tree: ptr = node->back_pointer; if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); slot = shortcut->parent_slot; cursor = shortcut->back_pointer; if (!cursor) goto gc_complete; } else { slot = node->parent_slot; cursor = ptr; } BUG_ON(!cursor); node = assoc_array_ptr_to_node(cursor); slot++; goto continue_node; gc_complete: edit->set[0].to = new_root; assoc_array_apply_edit(edit); array->nr_leaves_on_tree = nr_leaves_on_tree; return 0; enomem: pr_devel("enomem\n"); assoc_array_destroy_subtree(new_root, edit->ops); kfree(edit); return -ENOMEM; } |
360 365 360 100 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 | // SPDX-License-Identifier: GPL-2.0 /* * Implement mseal() syscall. * * Copyright (c) 2023,2024 Google, Inc. * * Author: Jeff Xu <jeffxu@chromium.org> */ #include <linux/mempolicy.h> #include <linux/mman.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/mmu_context.h> #include <linux/syscalls.h> #include <linux/sched.h> #include "internal.h" static inline void set_vma_sealed(struct vm_area_struct *vma) { vm_flags_set(vma, VM_SEALED); } static bool is_madv_discard(int behavior) { switch (behavior) { case MADV_FREE: case MADV_DONTNEED: case MADV_DONTNEED_LOCKED: case MADV_REMOVE: case MADV_DONTFORK: case MADV_WIPEONFORK: return true; } return false; } static bool is_ro_anon(struct vm_area_struct *vma) { /* check anonymous mapping. */ if (vma->vm_file || vma->vm_flags & VM_SHARED) return false; /* * check for non-writable: * PROT=RO or PKRU is not writeable. */ if (!(vma->vm_flags & VM_WRITE) || !arch_vma_access_permitted(vma, true, false, false)) return true; return false; } /* * Check if a vma is allowed to be modified by madvise. */ bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior) { if (!is_madv_discard(behavior)) return true; if (unlikely(!can_modify_vma(vma) && is_ro_anon(vma))) return false; /* Allow by default. */ return true; } static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) { int ret = 0; vm_flags_t oldflags = vma->vm_flags; if (newflags == oldflags) goto out; vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; } set_vma_sealed(vma); out: *prev = vma; return ret; } /* * Check for do_mseal: * 1> start is part of a valid vma. * 2> end is part of a valid vma. * 3> No gap (unallocated address) between start and end. * 4> map is sealable. */ static int check_mm_seal(unsigned long start, unsigned long end) { struct vm_area_struct *vma; unsigned long nstart = start; VMA_ITERATOR(vmi, current->mm, start); /* going through each vma to check. */ for_each_vma_range(vmi, vma, end) { if (vma->vm_start > nstart) /* unallocated memory found. */ return -ENOMEM; if (vma->vm_end >= end) return 0; nstart = vma->vm_end; } return -ENOMEM; } /* * Apply sealing. */ static int apply_mm_seal(unsigned long start, unsigned long end) { unsigned long nstart; struct vm_area_struct *vma, *prev; VMA_ITERATOR(vmi, current->mm, start); vma = vma_iter_load(&vmi); /* * Note: check_mm_seal should already checked ENOMEM case. * so vma should not be null, same for the other ENOMEM cases. */ prev = vma_prev(&vmi); if (start > vma->vm_start) prev = vma; nstart = start; for_each_vma_range(vmi, vma, end) { int error; unsigned long tmp; vm_flags_t newflags; newflags = vma->vm_flags | VM_SEALED; tmp = vma->vm_end; if (tmp > end) tmp = end; error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags); if (error) return error; nstart = vma_iter_end(&vmi); } return 0; } /* * mseal(2) seals the VM's meta data from * selected syscalls. * * addr/len: VM address range. * * The address range by addr/len must meet: * start (addr) must be in a valid VMA. * end (addr + len) must be in a valid VMA. * no gap (unallocated memory) between start and end. * start (addr) must be page aligned. * * len: len will be page aligned implicitly. * * Below VMA operations are blocked after sealing. * 1> Unmapping, moving to another location, and shrinking * the size, via munmap() and mremap(), can leave an empty * space, therefore can be replaced with a VMA with a new * set of attributes. * 2> Moving or expanding a different vma into the current location, * via mremap(). * 3> Modifying a VMA via mmap(MAP_FIXED). * 4> Size expansion, via mremap(), does not appear to pose any * specific risks to sealed VMAs. It is included anyway because * the use case is unclear. In any case, users can rely on * merging to expand a sealed VMA. * 5> mprotect and pkey_mprotect. * 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED) * for anonymous memory, when users don't have write permission to the * memory. Those behaviors can alter region contents by discarding pages, * effectively a memset(0) for anonymous memory. * * flags: reserved. * * return values: * zero: success. * -EINVAL: * invalid input flags. * start address is not page aligned. * Address arange (start + len) overflow. * -ENOMEM: * addr is not a valid address (not allocated). * end (start + len) is not a valid address. * a gap (unallocated memory) between start and end. * -EPERM: * - In 32 bit architecture, sealing is not supported. * Note: * user can call mseal(2) multiple times, adding a seal on an * already sealed memory is a no-action (no error). * * unseal() is not supported. */ int do_mseal(unsigned long start, size_t len_in, unsigned long flags) { size_t len; int ret = 0; unsigned long end; struct mm_struct *mm = current->mm; ret = can_do_mseal(flags); if (ret) return ret; start = untagged_addr(start); if (!PAGE_ALIGNED(start)) return -EINVAL; len = PAGE_ALIGN(len_in); /* Check to see whether len was rounded up from small -ve to zero. */ if (len_in && !len) return -EINVAL; end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; if (mmap_write_lock_killable(mm)) return -EINTR; /* * First pass, this helps to avoid * partial sealing in case of error in input address range, * e.g. ENOMEM error. */ ret = check_mm_seal(start, end); if (ret) goto out; /* * Second pass, this should success, unless there are errors * from vma_modify_flags, e.g. merge/split error, or process * reaching the max supported VMAs, however, those cases shall * be rare. */ ret = apply_mm_seal(start, end); out: mmap_write_unlock(current->mm); return ret; } SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long, flags) { return do_mseal(start, len, flags); } |
52 27 10 7 2 2 2249 2898 77 73 2814 1690 2380 31 2707 2185 138 1939 1742 9 1314 1875 1991 3481 3480 3484 3292 243 243 3484 3295 243 11 11 11 11 10 1 10 393 26 282 5 83 2 83 3278 1 33 11 3313 1792 1595 90 3223 3311 3318 3314 3312 935 934 1 570 564 5 3 4 4 3 3 4 2 4 1079 71 1015 1051 662 243 56 3 53 1082 1087 3404 91 9 82 268 245 6 45 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 | // SPDX-License-Identifier: GPL-2.0-or-later /* Provide a way to create a superblock configuration context within the kernel * that allows a superblock to be set up prior to mounting. * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/magic.h> #include <linux/security.h> #include <linux/mnt_namespace.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <net/net_namespace.h> #include <asm/sections.h> #include "mount.h" #include "internal.h" enum legacy_fs_param { LEGACY_FS_UNSET_PARAMS, LEGACY_FS_MONOLITHIC_PARAMS, LEGACY_FS_INDIVIDUAL_PARAMS, }; struct legacy_fs_context { char *legacy_data; /* Data page for legacy filesystems */ size_t data_size; enum legacy_fs_param param_type; }; static int legacy_init_fs_context(struct fs_context *fc); static const struct constant_table common_set_sb_flag[] = { { "dirsync", SB_DIRSYNC }, { "lazytime", SB_LAZYTIME }, { "mand", SB_MANDLOCK }, { "ro", SB_RDONLY }, { "sync", SB_SYNCHRONOUS }, { }, }; static const struct constant_table common_clear_sb_flag[] = { { "async", SB_SYNCHRONOUS }, { "nolazytime", SB_LAZYTIME }, { "nomand", SB_MANDLOCK }, { "rw", SB_RDONLY }, { }, }; /* * Check for a common mount option that manipulates s_flags. */ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) { unsigned int token; token = lookup_constant(common_set_sb_flag, key, 0); if (token) { fc->sb_flags |= token; fc->sb_flags_mask |= token; return 0; } token = lookup_constant(common_clear_sb_flag, key, 0); if (token) { fc->sb_flags &= ~token; fc->sb_flags_mask |= token; return 0; } return -ENOPARAM; } /** * vfs_parse_fs_param_source - Handle setting "source" via parameter * @fc: The filesystem context to modify * @param: The parameter * * This is a simple helper for filesystems to verify that the "source" they * accept is sane. * * Returns 0 on success, -ENOPARAM if this is not "source" parameter, and * -EINVAL otherwise. In the event of failure, supplementary error information * is logged. */ int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param) { if (strcmp(param->key, "source") != 0) return -ENOPARAM; if (param->type != fs_value_is_string) return invalf(fc, "Non-string source"); if (fc->source) return invalf(fc, "Multiple sources"); fc->source = param->string; param->string = NULL; return 0; } EXPORT_SYMBOL(vfs_parse_fs_param_source); /** * vfs_parse_fs_param - Add a single parameter to a superblock config * @fc: The filesystem context to modify * @param: The parameter * * A single mount option in string form is applied to the filesystem context * being set up. Certain standard options (for example "ro") are translated * into flag bits without going to the filesystem. The active security module * is allowed to observe and poach options. Any other options are passed over * to the filesystem to parse. * * This may be called multiple times for a context. * * Returns 0 on success and a negative error code on failure. In the event of * failure, supplementary error information may have been set. */ int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param) { int ret; if (!param->key) return invalf(fc, "Unnamed parameter\n"); ret = vfs_parse_sb_flag(fc, param->key); if (ret != -ENOPARAM) return ret; ret = security_fs_context_parse_param(fc, param); if (ret != -ENOPARAM) /* Param belongs to the LSM or is disallowed by the LSM; so * don't pass to the FS. */ return ret; if (fc->ops->parse_param) { ret = fc->ops->parse_param(fc, param); if (ret != -ENOPARAM) return ret; } /* If the filesystem doesn't take any arguments, give it the * default handling of source. */ ret = vfs_parse_fs_param_source(fc, param); if (ret != -ENOPARAM) return ret; return invalf(fc, "%s: Unknown parameter '%s'", fc->fs_type->name, param->key); } EXPORT_SYMBOL(vfs_parse_fs_param); /** * vfs_parse_fs_string - Convenience function to just parse a string. * @fc: Filesystem context. * @key: Parameter name. * @value: Default value. * @v_size: Maximum number of bytes in the value. */ int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size) { int ret; struct fs_parameter param = { .key = key, .type = fs_value_is_flag, .size = v_size, }; if (value) { param.string = kmemdup_nul(value, v_size, GFP_KERNEL); if (!param.string) return -ENOMEM; param.type = fs_value_is_string; } ret = vfs_parse_fs_param(fc, ¶m); kfree(param.string); return ret; } EXPORT_SYMBOL(vfs_parse_fs_string); /** * vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data * @fc: The superblock configuration to fill in. * @data: The data to parse * @sep: callback for separating next option * * Parse a blob of data that's in key[=val][,key[=val]]* form with a custom * option separator callback. * * Returns 0 on success or the error returned by the ->parse_option() fs_context * operation on failure. */ int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, char *(*sep)(char **)) { char *options = data, *key; int ret = 0; if (!options) return 0; ret = security_sb_eat_lsm_opts(options, &fc->security); if (ret) return ret; while ((key = sep(&options)) != NULL) { if (*key) { size_t v_len = 0; char *value = strchr(key, '='); if (value) { if (value == key) continue; *value++ = 0; v_len = strlen(value); } ret = vfs_parse_fs_string(fc, key, value, v_len); if (ret < 0) break; } } return ret; } EXPORT_SYMBOL(vfs_parse_monolithic_sep); static char *vfs_parse_comma_sep(char **s) { return strsep(s, ","); } /** * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data * @fc: The superblock configuration to fill in. * @data: The data to parse * * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be * called from the ->monolithic_mount_data() fs_context operation. * * Returns 0 on success or the error returned by the ->parse_option() fs_context * operation on failure. */ int generic_parse_monolithic(struct fs_context *fc, void *data) { return vfs_parse_monolithic_sep(fc, data, vfs_parse_comma_sep); } EXPORT_SYMBOL(generic_parse_monolithic); /** * alloc_fs_context - Create a filesystem context. * @fs_type: The filesystem type. * @reference: The dentry from which this one derives (or NULL) * @sb_flags: Filesystem/superblock flags (SB_*) * @sb_flags_mask: Applicable members of @sb_flags * @purpose: The purpose that this configuration shall be used for. * * Open a filesystem and create a mount context. The mount context is * initialised with the supplied flags and, if a submount/automount from * another superblock (referred to by @reference) is supplied, may have * parameters such as namespaces copied across from that superblock. */ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, struct dentry *reference, unsigned int sb_flags, unsigned int sb_flags_mask, enum fs_context_purpose purpose) { int (*init_fs_context)(struct fs_context *); struct fs_context *fc; int ret = -ENOMEM; fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT); if (!fc) return ERR_PTR(-ENOMEM); fc->purpose = purpose; fc->sb_flags = sb_flags; fc->sb_flags_mask = sb_flags_mask; fc->fs_type = get_filesystem(fs_type); fc->cred = get_current_cred(); fc->net_ns = get_net(current->nsproxy->net_ns); fc->log.prefix = fs_type->name; mutex_init(&fc->uapi_mutex); switch (purpose) { case FS_CONTEXT_FOR_MOUNT: fc->user_ns = get_user_ns(fc->cred->user_ns); break; case FS_CONTEXT_FOR_SUBMOUNT: fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); break; case FS_CONTEXT_FOR_RECONFIGURE: atomic_inc(&reference->d_sb->s_active); fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); fc->root = dget(reference); break; } /* TODO: Make all filesystems support this unconditionally */ init_fs_context = fc->fs_type->init_fs_context; if (!init_fs_context) init_fs_context = legacy_init_fs_context; ret = init_fs_context(fc); if (ret < 0) goto err_fc; fc->need_free = true; return fc; err_fc: put_fs_context(fc); return ERR_PTR(ret); } struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, unsigned int sb_flags) { return alloc_fs_context(fs_type, NULL, sb_flags, 0, FS_CONTEXT_FOR_MOUNT); } EXPORT_SYMBOL(fs_context_for_mount); struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, unsigned int sb_flags, unsigned int sb_flags_mask) { return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags, sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE); } EXPORT_SYMBOL(fs_context_for_reconfigure); /** * fs_context_for_submount: allocate a new fs_context for a submount * @type: file_system_type of the new context * @reference: reference dentry from which to copy relevant info * * Allocate a new fs_context suitable for a submount. This also ensures that * the fc->security object is inherited from @reference (if needed). */ struct fs_context *fs_context_for_submount(struct file_system_type *type, struct dentry *reference) { struct fs_context *fc; int ret; fc = alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT); if (IS_ERR(fc)) return fc; ret = security_fs_context_submount(fc, reference->d_sb); if (ret) { put_fs_context(fc); return ERR_PTR(ret); } return fc; } EXPORT_SYMBOL(fs_context_for_submount); void fc_drop_locked(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; dput(fc->root); fc->root = NULL; deactivate_locked_super(sb); } static void legacy_fs_context_free(struct fs_context *fc); /** * vfs_dup_fs_context - Duplicate a filesystem context. * @src_fc: The context to copy. */ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) { struct fs_context *fc; int ret; if (!src_fc->ops->dup) return ERR_PTR(-EOPNOTSUPP); fc = kmemdup(src_fc, sizeof(struct fs_context), GFP_KERNEL); if (!fc) return ERR_PTR(-ENOMEM); mutex_init(&fc->uapi_mutex); fc->fs_private = NULL; fc->s_fs_info = NULL; fc->source = NULL; fc->security = NULL; get_filesystem(fc->fs_type); get_net(fc->net_ns); get_user_ns(fc->user_ns); get_cred(fc->cred); if (fc->log.log) refcount_inc(&fc->log.log->usage); /* Can't call put until we've called ->dup */ ret = fc->ops->dup(fc, src_fc); if (ret < 0) goto err_fc; ret = security_fs_context_dup(fc, src_fc); if (ret < 0) goto err_fc; return fc; err_fc: put_fs_context(fc); return ERR_PTR(ret); } EXPORT_SYMBOL(vfs_dup_fs_context); /** * logfc - Log a message to a filesystem context * @log: The filesystem context to log to, or NULL to use printk. * @prefix: A string to prefix the output with, or NULL. * @level: 'w' for a warning, 'e' for an error. Anything else is a notice. * @fmt: The format of the buffer. */ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...) { va_list va; struct va_format vaf = {.fmt = fmt, .va = &va}; va_start(va, fmt); if (!log) { switch (level) { case 'w': printk(KERN_WARNING "%s%s%pV\n", prefix ? prefix : "", prefix ? ": " : "", &vaf); break; case 'e': printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "", prefix ? ": " : "", &vaf); break; default: printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "", prefix ? ": " : "", &vaf); break; } } else { unsigned int logsize = ARRAY_SIZE(log->buffer); u8 index; char *q = kasprintf(GFP_KERNEL, "%c %s%s%pV\n", level, prefix ? prefix : "", prefix ? ": " : "", &vaf); index = log->head & (logsize - 1); BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) || sizeof(log->tail) != sizeof(u8)); if ((u8)(log->head - log->tail) == logsize) { /* The buffer is full, discard the oldest message */ if (log->need_free & (1 << index)) kfree(log->buffer[index]); log->tail++; } log->buffer[index] = q ? q : "OOM: Can't store error string"; if (q) log->need_free |= 1 << index; else log->need_free &= ~(1 << index); log->head++; } va_end(va); } EXPORT_SYMBOL(logfc); /* * Free a logging structure. */ static void put_fc_log(struct fs_context *fc) { struct fc_log *log = fc->log.log; int i; if (log) { if (refcount_dec_and_test(&log->usage)) { fc->log.log = NULL; for (i = 0; i <= 7; i++) if (log->need_free & (1 << i)) kfree(log->buffer[i]); kfree(log); } } } /** * put_fs_context - Dispose of a superblock configuration context. * @fc: The context to dispose of. */ void put_fs_context(struct fs_context *fc) { struct super_block *sb; if (fc->root) { sb = fc->root->d_sb; dput(fc->root); fc->root = NULL; deactivate_super(sb); } if (fc->need_free && fc->ops && fc->ops->free) fc->ops->free(fc); security_free_mnt_opts(&fc->security); put_net(fc->net_ns); put_user_ns(fc->user_ns); put_cred(fc->cred); put_fc_log(fc); put_filesystem(fc->fs_type); kfree(fc->source); kfree(fc); } EXPORT_SYMBOL(put_fs_context); /* * Free the config for a filesystem that doesn't support fs_context. */ static void legacy_fs_context_free(struct fs_context *fc) { struct legacy_fs_context *ctx = fc->fs_private; if (ctx) { if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) kfree(ctx->legacy_data); kfree(ctx); } } /* * Duplicate a legacy config. */ static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { struct legacy_fs_context *ctx; struct legacy_fs_context *src_ctx = src_fc->fs_private; ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) { ctx->legacy_data = kmemdup(src_ctx->legacy_data, src_ctx->data_size, GFP_KERNEL); if (!ctx->legacy_data) { kfree(ctx); return -ENOMEM; } } fc->fs_private = ctx; return 0; } /* * Add a parameter to a legacy config. We build up a comma-separated list of * options. */ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct legacy_fs_context *ctx = fc->fs_private; unsigned int size = ctx->data_size; size_t len = 0; int ret; ret = vfs_parse_fs_param_source(fc, param); if (ret != -ENOPARAM) return ret; if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); switch (param->type) { case fs_value_is_string: len = 1 + param->size; fallthrough; case fs_value_is_flag: len += strlen(param->key); break; default: return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported", param->key); } if (size + len + 2 > PAGE_SIZE) return invalf(fc, "VFS: Legacy: Cumulative options too large"); if (strchr(param->key, ',') || (param->type == fs_value_is_string && memchr(param->string, ',', param->size))) return invalf(fc, "VFS: Legacy: Option '%s' contained comma", param->key); if (!ctx->legacy_data) { ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!ctx->legacy_data) return -ENOMEM; } if (size) ctx->legacy_data[size++] = ','; len = strlen(param->key); memcpy(ctx->legacy_data + size, param->key, len); size += len; if (param->type == fs_value_is_string) { ctx->legacy_data[size++] = '='; memcpy(ctx->legacy_data + size, param->string, param->size); size += param->size; } ctx->legacy_data[size] = '\0'; ctx->data_size = size; ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS; return 0; } /* * Add monolithic mount data. */ static int legacy_parse_monolithic(struct fs_context *fc, void *data) { struct legacy_fs_context *ctx = fc->fs_private; if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) { pr_warn("VFS: Can't mix monolithic and individual options\n"); return -EINVAL; } ctx->legacy_data = data; ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS; if (!ctx->legacy_data) return 0; if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) return 0; return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security); } /* * Get a mountable root with the legacy mount command. */ static int legacy_get_tree(struct fs_context *fc) { struct legacy_fs_context *ctx = fc->fs_private; struct super_block *sb; struct dentry *root; root = fc->fs_type->mount(fc->fs_type, fc->sb_flags, fc->source, ctx->legacy_data); if (IS_ERR(root)) return PTR_ERR(root); sb = root->d_sb; BUG_ON(!sb); fc->root = root; return 0; } /* * Handle remount. */ static int legacy_reconfigure(struct fs_context *fc) { struct legacy_fs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; if (!sb->s_op->remount_fs) return 0; return sb->s_op->remount_fs(sb, &fc->sb_flags, ctx ? ctx->legacy_data : NULL); } const struct fs_context_operations legacy_fs_context_ops = { .free = legacy_fs_context_free, .dup = legacy_fs_context_dup, .parse_param = legacy_parse_param, .parse_monolithic = legacy_parse_monolithic, .get_tree = legacy_get_tree, .reconfigure = legacy_reconfigure, }; /* * Initialise a legacy context for a filesystem that doesn't support * fs_context. */ static int legacy_init_fs_context(struct fs_context *fc) { fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT); if (!fc->fs_private) return -ENOMEM; fc->ops = &legacy_fs_context_ops; return 0; } int parse_monolithic_mount_data(struct fs_context *fc, void *data) { int (*monolithic_mount_data)(struct fs_context *, void *); monolithic_mount_data = fc->ops->parse_monolithic; if (!monolithic_mount_data) monolithic_mount_data = generic_parse_monolithic; return monolithic_mount_data(fc, data); } /* * Clean up a context after performing an action on it and put it into a state * from where it can be used to reconfigure a superblock. * * Note that here we do only the parts that can't fail; the rest is in * finish_clean_context() below and in between those fs_context is marked * FS_CONTEXT_AWAITING_RECONF. The reason for splitup is that after * successful mount or remount we need to report success to userland. * Trying to do full reinit (for the sake of possible subsequent remount) * and failing to allocate memory would've put us into a nasty situation. * So here we only discard the old state and reinitialization is left * until we actually try to reconfigure. */ void vfs_clean_context(struct fs_context *fc) { if (fc->need_free && fc->ops && fc->ops->free) fc->ops->free(fc); fc->need_free = false; fc->fs_private = NULL; fc->s_fs_info = NULL; fc->sb_flags = 0; security_free_mnt_opts(&fc->security); kfree(fc->source); fc->source = NULL; fc->exclusive = false; fc->purpose = FS_CONTEXT_FOR_RECONFIGURE; fc->phase = FS_CONTEXT_AWAITING_RECONF; } int finish_clean_context(struct fs_context *fc) { int error; if (fc->phase != FS_CONTEXT_AWAITING_RECONF) return 0; if (fc->fs_type->init_fs_context) error = fc->fs_type->init_fs_context(fc); else error = legacy_init_fs_context(fc); if (unlikely(error)) { fc->phase = FS_CONTEXT_FAILED; return error; } fc->need_free = true; fc->phase = FS_CONTEXT_RECONF_PARAMS; return 0; } |
46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 | // SPDX-License-Identifier: GPL-2.0 /* Bareudp: UDP tunnel encasulation for different Payload types like * MPLS, NSH, IP, etc. * Copyright (c) 2019 Nokia, Inc. * Authors: Martin Varghese, <martin.varghese@nokia.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/hash.h> #include <net/dst_metadata.h> #include <net/gro_cells.h> #include <net/rtnetlink.h> #include <net/protocol.h> #include <net/ip6_tunnel.h> #include <net/ip_tunnels.h> #include <net/udp_tunnel.h> #include <net/bareudp.h> #define BAREUDP_BASE_HLEN sizeof(struct udphdr) #define BAREUDP_IPV4_HLEN (sizeof(struct iphdr) + \ sizeof(struct udphdr)) #define BAREUDP_IPV6_HLEN (sizeof(struct ipv6hdr) + \ sizeof(struct udphdr)) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); /* per-network namespace private data for this module */ static unsigned int bareudp_net_id; struct bareudp_net { struct list_head bareudp_list; }; struct bareudp_conf { __be16 ethertype; __be16 port; u16 sport_min; bool multi_proto_mode; }; /* Pseudo network device */ struct bareudp_dev { struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for bareudp tunnel */ __be16 ethertype; __be16 port; u16 sport_min; bool multi_proto_mode; struct socket __rcu *sock; struct list_head next; /* bareudp node on namespace list */ struct gro_cells gro_cells; }; static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; IP_TUNNEL_DECLARE_FLAGS(key) = { }; struct bareudp_dev *bareudp; unsigned short family; unsigned int len; __be16 proto; void *oiph; int err; int nh; bareudp = rcu_dereference_sk_user_data(sk); if (!bareudp) goto drop; if (skb->protocol == htons(ETH_P_IP)) family = AF_INET; else family = AF_INET6; if (bareudp->ethertype == htons(ETH_P_IP)) { __u8 ipversion; if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion, sizeof(ipversion))) { DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } ipversion >>= 4; if (ipversion == 4) { proto = htons(ETH_P_IP); } else if (ipversion == 6 && bareudp->multi_proto_mode) { proto = htons(ETH_P_IPV6); } else { DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } else if (bareudp->ethertype == htons(ETH_P_MPLS_UC)) { struct iphdr *tunnel_hdr; tunnel_hdr = (struct iphdr *)skb_network_header(skb); if (tunnel_hdr->version == 4) { if (!ipv4_is_multicast(tunnel_hdr->daddr)) { proto = bareudp->ethertype; } else if (bareudp->multi_proto_mode && ipv4_is_multicast(tunnel_hdr->daddr)) { proto = htons(ETH_P_MPLS_MC); } else { DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } else { int addr_type; struct ipv6hdr *tunnel_hdr_v6; tunnel_hdr_v6 = (struct ipv6hdr *)skb_network_header(skb); addr_type = ipv6_addr_type((struct in6_addr *)&tunnel_hdr_v6->daddr); if (!(addr_type & IPV6_ADDR_MULTICAST)) { proto = bareudp->ethertype; } else if (bareudp->multi_proto_mode && (addr_type & IPV6_ADDR_MULTICAST)) { proto = htons(ETH_P_MPLS_MC); } else { DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } } else { proto = bareudp->ethertype; } if (iptunnel_pull_header(skb, BAREUDP_BASE_HLEN, proto, !net_eq(bareudp->net, dev_net(bareudp->dev)))) { DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } __set_bit(IP_TUNNEL_KEY_BIT, key); tun_dst = udp_tun_rx_dst(skb, family, key, 0, 0); if (!tun_dst) { DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } skb_dst_set(skb, &tun_dst->dst); skb->dev = bareudp->dev; skb_reset_mac_header(skb); /* Save offset of outer header relative to skb->head, * because we are going to reset the network header to the inner header * and might change skb->head. */ nh = skb_network_header(skb) - skb->head; skb_reset_network_header(skb); if (!pskb_inet_may_pull(skb)) { DEV_STATS_INC(bareudp->dev, rx_length_errors); DEV_STATS_INC(bareudp->dev, rx_errors); goto drop; } /* Get the outer header. */ oiph = skb->head + nh; if (!ipv6_mod_enabled() || family == AF_INET) err = IP_ECN_decapsulate(oiph, skb); else err = IP6_ECN_decapsulate(oiph, skb); if (unlikely(err)) { if (log_ecn_error) { if (!ipv6_mod_enabled() || family == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, ((struct iphdr *)oiph)->tos); else net_info_ratelimited("non-ECT from %pI6\n", &((struct ipv6hdr *)oiph)->saddr); } if (err > 1) { DEV_STATS_INC(bareudp->dev, rx_frame_errors); DEV_STATS_INC(bareudp->dev, rx_errors); goto drop; } } len = skb->len; err = gro_cells_receive(&bareudp->gro_cells, skb); if (likely(err == NET_RX_SUCCESS)) dev_sw_netstats_rx_add(bareudp->dev, len); return 0; drop: /* Consume bad packet */ kfree_skb(skb); return 0; } static int bareudp_err_lookup(struct sock *sk, struct sk_buff *skb) { return 0; } static int bareudp_init(struct net_device *dev) { struct bareudp_dev *bareudp = netdev_priv(dev); int err; err = gro_cells_init(&bareudp->gro_cells, dev); if (err) return err; return 0; } static void bareudp_uninit(struct net_device *dev) { struct bareudp_dev *bareudp = netdev_priv(dev); gro_cells_destroy(&bareudp->gro_cells); } static struct socket *bareudp_create_sock(struct net *net, __be16 port) { struct udp_port_cfg udp_conf; struct socket *sock; int err; memset(&udp_conf, 0, sizeof(udp_conf)); if (ipv6_mod_enabled()) udp_conf.family = AF_INET6; else udp_conf.family = AF_INET; udp_conf.local_udp_port = port; /* Open UDP socket */ err = udp_sock_create(net, &udp_conf, &sock); if (err < 0) return ERR_PTR(err); udp_allow_gso(sock->sk); return sock; } /* Create new listen socket if needed */ static int bareudp_socket_create(struct bareudp_dev *bareudp, __be16 port) { struct udp_tunnel_sock_cfg tunnel_cfg; struct socket *sock; sock = bareudp_create_sock(bareudp->net, port); if (IS_ERR(sock)) return PTR_ERR(sock); /* Mark socket as an encapsulation socket */ memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); tunnel_cfg.sk_user_data = bareudp; tunnel_cfg.encap_type = 1; tunnel_cfg.encap_rcv = bareudp_udp_encap_recv; tunnel_cfg.encap_err_lookup = bareudp_err_lookup; tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(bareudp->net, sock, &tunnel_cfg); rcu_assign_pointer(bareudp->sock, sock); return 0; } static int bareudp_open(struct net_device *dev) { struct bareudp_dev *bareudp = netdev_priv(dev); int ret = 0; ret = bareudp_socket_create(bareudp, bareudp->port); return ret; } static void bareudp_sock_release(struct bareudp_dev *bareudp) { struct socket *sock; sock = bareudp->sock; rcu_assign_pointer(bareudp->sock, NULL); synchronize_net(); udp_tunnel_sock_release(sock); } static int bareudp_stop(struct net_device *dev) { struct bareudp_dev *bareudp = netdev_priv(dev); bareudp_sock_release(bareudp); return 0; } static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct bareudp_dev *bareudp, const struct ip_tunnel_info *info) { bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct socket *sock = rcu_dereference(bareudp->sock); const struct ip_tunnel_key *key = &info->key; struct rtable *rt; __be16 sport, df; int min_headroom; __u8 tos, ttl; __be32 saddr; int err; if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) return -EINVAL; if (!sock) return -ESHUTDOWN; sport = udp_flow_src_port(bareudp->net, skb, bareudp->sport_min, USHRT_MAX, true); rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, &info->key, sport, bareudp->port, key->tos, use_cache ? (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); skb_tunnel_check_pmtu(skb, &rt->dst, BAREUDP_IPV4_HLEN + info->options_len, false); tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? htons(IP_DF) : 0; skb_scrub_packet(skb, xnet); err = -ENOSPC; if (!skb_pull(skb, skb_network_offset(skb))) goto free_dst; min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) goto free_dst; err = udp_tunnel_handle_offloads(skb, udp_sum); if (err) goto free_dst; skb_set_inner_protocol(skb, bareudp->ethertype); udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, bareudp->port, !net_eq(bareudp->net, dev_net(bareudp->dev)), !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)); return 0; free_dst: dst_release(&rt->dst); return err; } static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct bareudp_dev *bareudp, const struct ip_tunnel_info *info) { bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct socket *sock = rcu_dereference(bareudp->sock); const struct ip_tunnel_key *key = &info->key; struct dst_entry *dst = NULL; struct in6_addr saddr, daddr; int min_headroom; __u8 prio, ttl; __be16 sport; int err; if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) return -EINVAL; if (!sock) return -ESHUTDOWN; sport = udp_flow_src_port(bareudp->net, skb, bareudp->sport_min, USHRT_MAX, true); dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, 0, &saddr, key, sport, bareudp->port, key->tos, use_cache ? (struct dst_cache *) &info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len, false); prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; skb_scrub_packet(skb, xnet); err = -ENOSPC; if (!skb_pull(skb, skb_network_offset(skb))) goto free_dst; min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct ipv6hdr); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) goto free_dst; err = udp_tunnel_handle_offloads(skb, udp_sum); if (err) goto free_dst; daddr = info->key.u.ipv6.dst; udp_tunnel6_xmit_skb(dst, sock->sk, skb, dev, &saddr, &daddr, prio, ttl, info->key.label, sport, bareudp->port, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)); return 0; free_dst: dst_release(dst); return err; } static bool bareudp_proto_valid(struct bareudp_dev *bareudp, __be16 proto) { if (bareudp->ethertype == proto) return true; if (!bareudp->multi_proto_mode) return false; if (bareudp->ethertype == htons(ETH_P_MPLS_UC) && proto == htons(ETH_P_MPLS_MC)) return true; if (bareudp->ethertype == htons(ETH_P_IP) && proto == htons(ETH_P_IPV6)) return true; return false; } static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) { struct bareudp_dev *bareudp = netdev_priv(dev); struct ip_tunnel_info *info = NULL; int err; if (!bareudp_proto_valid(bareudp, skb->protocol)) { err = -EINVAL; goto tx_error; } info = skb_tunnel_info(skb); if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { err = -EINVAL; goto tx_error; } rcu_read_lock(); if (ipv6_mod_enabled() && info->mode & IP_TUNNEL_INFO_IPV6) err = bareudp6_xmit_skb(skb, dev, bareudp, info); else err = bareudp_xmit_skb(skb, dev, bareudp, info); rcu_read_unlock(); if (likely(!err)) return NETDEV_TX_OK; tx_error: dev_kfree_skb(skb); if (err == -ELOOP) DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) DEV_STATS_INC(dev, tx_carrier_errors); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static int bareudp_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct bareudp_dev *bareudp = netdev_priv(dev); bool use_cache; __be16 sport; use_cache = ip_tunnel_dst_cache_usable(skb, info); sport = udp_flow_src_port(bareudp->net, skb, bareudp->sport_min, USHRT_MAX, true); if (!ipv6_mod_enabled() || ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; __be32 saddr; rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, &info->key, sport, bareudp->port, info->key.tos, use_cache ? &info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); info->key.u.ipv4.src = saddr; } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct in6_addr saddr; struct socket *sock = rcu_dereference(bareudp->sock); dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, 0, &saddr, &info->key, sport, bareudp->port, info->key.tos, use_cache ? &info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); dst_release(dst); info->key.u.ipv6.src = saddr; } else { return -EINVAL; } info->key.tp_src = sport; info->key.tp_dst = bareudp->port; return 0; } static const struct net_device_ops bareudp_netdev_ops = { .ndo_init = bareudp_init, .ndo_uninit = bareudp_uninit, .ndo_open = bareudp_open, .ndo_stop = bareudp_stop, .ndo_start_xmit = bareudp_xmit, .ndo_fill_metadata_dst = bareudp_fill_metadata_dst, }; static const struct nla_policy bareudp_policy[IFLA_BAREUDP_MAX + 1] = { [IFLA_BAREUDP_PORT] = { .type = NLA_U16 }, [IFLA_BAREUDP_ETHERTYPE] = { .type = NLA_U16 }, [IFLA_BAREUDP_SRCPORT_MIN] = { .type = NLA_U16 }, [IFLA_BAREUDP_MULTIPROTO_MODE] = { .type = NLA_FLAG }, }; /* Info for udev, that this is a virtual tunnel endpoint */ static const struct device_type bareudp_type = { .name = "bareudp", }; /* Initialize the device structure. */ static void bareudp_setup(struct net_device *dev) { dev->netdev_ops = &bareudp_netdev_ops; dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &bareudp_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->hard_header_len = 0; dev->addr_len = 0; dev->mtu = ETH_DATA_LEN; dev->min_mtu = IPV4_MIN_MTU; dev->max_mtu = IP_MAX_MTU - BAREUDP_BASE_HLEN; dev->type = ARPHRD_NONE; netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; } static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (!data) { NL_SET_ERR_MSG(extack, "Not enough attributes provided to perform the operation"); return -EINVAL; } return 0; } static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf, struct netlink_ext_ack *extack) { memset(conf, 0, sizeof(*conf)); if (!data[IFLA_BAREUDP_PORT]) { NL_SET_ERR_MSG(extack, "port not specified"); return -EINVAL; } if (!data[IFLA_BAREUDP_ETHERTYPE]) { NL_SET_ERR_MSG(extack, "ethertype not specified"); return -EINVAL; } conf->port = nla_get_u16(data[IFLA_BAREUDP_PORT]); conf->ethertype = nla_get_u16(data[IFLA_BAREUDP_ETHERTYPE]); if (data[IFLA_BAREUDP_SRCPORT_MIN]) conf->sport_min = nla_get_u16(data[IFLA_BAREUDP_SRCPORT_MIN]); if (data[IFLA_BAREUDP_MULTIPROTO_MODE]) conf->multi_proto_mode = true; return 0; } static struct bareudp_dev *bareudp_find_dev(struct bareudp_net *bn, const struct bareudp_conf *conf) { struct bareudp_dev *bareudp, *t = NULL; list_for_each_entry(bareudp, &bn->bareudp_list, next) { if (conf->port == bareudp->port) t = bareudp; } return t; } static int bareudp_configure(struct net *net, struct net_device *dev, struct bareudp_conf *conf, struct netlink_ext_ack *extack) { struct bareudp_net *bn = net_generic(net, bareudp_net_id); struct bareudp_dev *t, *bareudp = netdev_priv(dev); int err; bareudp->net = net; bareudp->dev = dev; t = bareudp_find_dev(bn, conf); if (t) { NL_SET_ERR_MSG(extack, "Another bareudp device using the same port already exists"); return -EBUSY; } if (conf->multi_proto_mode && (conf->ethertype != htons(ETH_P_MPLS_UC) && conf->ethertype != htons(ETH_P_IP))) { NL_SET_ERR_MSG(extack, "Cannot set multiproto mode for this ethertype (only IPv4 and unicast MPLS are supported)"); return -EINVAL; } bareudp->port = conf->port; bareudp->ethertype = conf->ethertype; bareudp->sport_min = conf->sport_min; bareudp->multi_proto_mode = conf->multi_proto_mode; err = register_netdevice(dev); if (err) return err; list_add(&bareudp->next, &bn->bareudp_list); return 0; } static int bareudp_link_config(struct net_device *dev, struct nlattr *tb[]) { int err; if (tb[IFLA_MTU]) { err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); if (err) return err; } return 0; } static void bareudp_dellink(struct net_device *dev, struct list_head *head) { struct bareudp_dev *bareudp = netdev_priv(dev); list_del(&bareudp->next); unregister_netdevice_queue(dev, head); } static int bareudp_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bareudp_conf conf; int err; err = bareudp2info(data, &conf, extack); if (err) return err; err = bareudp_configure(net, dev, &conf, extack); if (err) return err; err = bareudp_link_config(dev, tb); if (err) goto err_unconfig; return 0; err_unconfig: bareudp_dellink(dev, NULL); return err; } static size_t bareudp_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_PORT */ nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_ETHERTYPE */ nla_total_size(sizeof(__u16)) + /* IFLA_BAREUDP_SRCPORT_MIN */ nla_total_size(0) + /* IFLA_BAREUDP_MULTIPROTO_MODE */ 0; } static int bareudp_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct bareudp_dev *bareudp = netdev_priv(dev); if (nla_put_be16(skb, IFLA_BAREUDP_PORT, bareudp->port)) goto nla_put_failure; if (nla_put_be16(skb, IFLA_BAREUDP_ETHERTYPE, bareudp->ethertype)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BAREUDP_SRCPORT_MIN, bareudp->sport_min)) goto nla_put_failure; if (bareudp->multi_proto_mode && nla_put_flag(skb, IFLA_BAREUDP_MULTIPROTO_MODE)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops bareudp_link_ops __read_mostly = { .kind = "bareudp", .maxtype = IFLA_BAREUDP_MAX, .policy = bareudp_policy, .priv_size = sizeof(struct bareudp_dev), .setup = bareudp_setup, .validate = bareudp_validate, .newlink = bareudp_newlink, .dellink = bareudp_dellink, .get_size = bareudp_get_size, .fill_info = bareudp_fill_info, }; static __net_init int bareudp_init_net(struct net *net) { struct bareudp_net *bn = net_generic(net, bareudp_net_id); INIT_LIST_HEAD(&bn->bareudp_list); return 0; } static void bareudp_destroy_tunnels(struct net *net, struct list_head *head) { struct bareudp_net *bn = net_generic(net, bareudp_net_id); struct bareudp_dev *bareudp, *next; list_for_each_entry_safe(bareudp, next, &bn->bareudp_list, next) unregister_netdevice_queue(bareudp->dev, head); } static void __net_exit bareudp_exit_batch_rtnl(struct list_head *net_list, struct list_head *dev_kill_list) { struct net *net; list_for_each_entry(net, net_list, exit_list) bareudp_destroy_tunnels(net, dev_kill_list); } static struct pernet_operations bareudp_net_ops = { .init = bareudp_init_net, .exit_batch_rtnl = bareudp_exit_batch_rtnl, .id = &bareudp_net_id, .size = sizeof(struct bareudp_net), }; static int __init bareudp_init_module(void) { int rc; rc = register_pernet_subsys(&bareudp_net_ops); if (rc) goto out1; rc = rtnl_link_register(&bareudp_link_ops); if (rc) goto out2; return 0; out2: unregister_pernet_subsys(&bareudp_net_ops); out1: return rc; } late_initcall(bareudp_init_module); static void __exit bareudp_cleanup_module(void) { rtnl_link_unregister(&bareudp_link_ops); unregister_pernet_subsys(&bareudp_net_ops); } module_exit(bareudp_cleanup_module); MODULE_ALIAS_RTNL_LINK("bareudp"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Varghese <martin.varghese@nokia.com>"); MODULE_DESCRIPTION("Interface driver for UDP encapsulated traffic"); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | // SPDX-License-Identifier: GPL-2.0 /* * HID driver for Maltron L90 * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2008 Jiri Slaby * Copyright (c) 2012 David Dillow <dave@thedillows.org> * Copyright (c) 2006-2013 Jiri Kosina * Copyright (c) 2013 Colin Leitner <colin.leitner@gmail.com> * Copyright (c) 2014-2016 Frank Praznik <frank.praznik@gmail.com> * Copyright (c) 2010 Richard Nauber <Richard.Nauber@gmail.com> * Copyright (c) 2016 Yuxuan Shui <yshuiv7@gmail.com> * Copyright (c) 2018 William Whistler <wtbw@wtbw.co.uk> */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" /* The original buggy USB descriptor */ static const u8 maltron_rdesc_o[] = { 0x05, 0x01, /* Usage Page (Generic Desktop Ctrls) */ 0x09, 0x80, /* Usage (Sys Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x02, /* Report ID (2) */ 0x75, 0x01, /* Report Size (1) */ 0x95, 0x01, /* Report Count (1) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x83, /* Usage (Sys Wake Up) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x75, 0x05, /* Report Size (5) */ 0x81, 0x01, /* Input (Const,Array,Abs) */ 0xC0, /* End Collection */ 0x05, 0x0C, /* Usage Page (Consumer) */ 0x09, 0x01, /* Usage (Consumer Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x03, /* Report ID (3) */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (Unassigned) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0xC0, /* End Collection */ 0x06, 0x7F, 0xFF, /* Usage Page (Vendor Defined 0xFF7F) */ 0x09, 0x01, /* Usage (0x01) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x04, /* Report ID (4) */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (0x00) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0x75, 0x02, /* Report Size (2) */ 0x25, 0x02, /* Logical Maximum (2) */ 0x09, 0x90, /* Usage (0x90) */ 0xB1, 0x02, /* Feature (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0xB1, 0x01, /* Feature (Const,Array,Abs) */ 0x75, 0x01, /* Report Size (1) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x05, 0x08, /* Usage Page (LEDs) */ 0x09, 0x2A, /* Usage (On-Line) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x09, 0x4B, /* Usage (Generic Indicator) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0x95, 0x01, /* Report Count (1) */ 0x91, 0x01, /* Output (Const,Array,Abs) */ 0xC0 /* End Collection */ }; /* The patched descriptor, allowing media key events to be accepted as valid */ static const u8 maltron_rdesc[] = { 0x05, 0x01, /* Usage Page (Generic Desktop Ctrls) */ 0x09, 0x80, /* Usage (Sys Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x02, /* Report ID (2) */ 0x75, 0x01, /* Report Size (1) */ 0x95, 0x01, /* Report Count (1) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x83, /* Usage (Sys Wake Up) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x75, 0x05, /* Report Size (5) */ 0x81, 0x01, /* Input (Const,Array,Abs) */ 0xC0, /* End Collection */ 0x05, 0x0C, /* Usage Page (Consumer) */ 0x09, 0x01, /* Usage (Consumer Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x03, /* Report ID (3) */ 0x15, 0x00, /* Logical Minimum (0) - changed */ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767) - changed */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (Unassigned) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0xC0, /* End Collection */ 0x06, 0x7F, 0xFF, /* Usage Page (Vendor Defined 0xFF7F) */ 0x09, 0x01, /* Usage (0x01) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x04, /* Report ID (4) */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (0x00) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0x75, 0x02, /* Report Size (2) */ 0x25, 0x02, /* Logical Maximum (2) */ 0x09, 0x90, /* Usage (0x90) */ 0xB1, 0x02, /* Feature (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0xB1, 0x01, /* Feature (Const,Array,Abs) */ 0x75, 0x01, /* Report Size (1) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x05, 0x08, /* Usage Page (LEDs) */ 0x09, 0x2A, /* Usage (On-Line) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x09, 0x4B, /* Usage (Generic Indicator) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0x95, 0x01, /* Report Count (1) */ 0x91, 0x01, /* Output (Const,Array,Abs) */ 0xC0 /* End Collection */ }; static const __u8 *maltron_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize == sizeof(maltron_rdesc_o) && !memcmp(maltron_rdesc_o, rdesc, sizeof(maltron_rdesc_o))) { hid_info(hdev, "Replacing Maltron L90 keyboard report descriptor\n"); *rsize = sizeof(maltron_rdesc); return maltron_rdesc; } return rdesc; } static const struct hid_device_id maltron_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ALCOR, USB_DEVICE_ID_ALCOR_MALTRON_KB)}, { } }; MODULE_DEVICE_TABLE(hid, maltron_devices); static struct hid_driver maltron_driver = { .name = "maltron", .id_table = maltron_devices, .report_fixup = maltron_report_fixup }; module_hid_driver(maltron_driver); MODULE_DESCRIPTION("HID driver for Maltron L90"); MODULE_LICENSE("GPL"); |
76 24 53 77 1 77 1 1 77 1 77 9 43 60 60 7 10 17 17 75 51 27 3 12 1 86 6 79 6 26 3 20 89 89 29 5 2 4 35 1 12 5 17 6 21 3 5 5 5 5 7 7 7 7 7 20 3 2 3 3 2 3 4 4 46 15 13 3 7 70 9 2 15 15 46 1 1 1 1 1 3 1 1 5 5 5 5 5 1 1 1 1 3 14 14 8 5 2 1 1 3 5 12 1 1 1 1 1 1 1 82 45 10 72 6 77 73 10 82 57 18 1 11 13 39 39 39 28 46 35 1 4 1 1 1 4 28 3 29 28 1 6 24 10 1 1 3 1 10 8 9 1 1 9 12 78 4 88 91 16 75 9 81 82 9 91 90 75 2 26 47 39 39 37 17 1 77 18 49 7 45 78 2 8 1 54 27 79 44 1 43 27 10 2 35 33 15 2 1 5 6 1 15 4 15 4 5 3 6 2 81 2 64 15 67 12 70 9 68 11 71 8 60 19 49 30 73 5 76 3 76 3 73 6 76 3 47 32 16 16 23 23 1 21 23 23 1 64 63 14 56 56 17 37 6 46 2 39 11 28 4 28 28 16 16 6 1 8 7 15 8 7 7 7 7 3 1 2 149 244 223 18 3 16 5 3 1 1 1 1 1 1 1 1 216 38 28 28 28 21 7 4 20 20 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 | // SPDX-License-Identifier: GPL-2.0-or-later /* * GRE over IPv6 protocol decoder. * * Authors: Dmitry Kozlov (xeb@mail.ru) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/in6.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/hash.h> #include <linux/if_tunnel.h> #include <linux/ip6_tunnel.h> #include <net/sock.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/addrconf.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/ip6_tunnel.h> #include <net/gre.h> #include <net/erspan.h> #include <net/dst_metadata.h> static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define IP6_GRE_HASH_SIZE_SHIFT 5 #define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) static unsigned int ip6gre_net_id __read_mostly; struct ip6gre_net { struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; struct ip6_tnl __rcu *collect_md_tun; struct ip6_tnl __rcu *collect_md_tun_erspan; struct net_device *fb_tunnel_dev; }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly; static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly; static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu); /* Tunnel hash table */ /* 4 hash tables: 3: (remote,local) 2: (remote,*) 1: (*,local) 0: (*,*) We require exact key match i.e. if a key is present in packet it will match only tunnel with the same key; if it is not present, it will match only keyless tunnel. All keysless packets, if not matched configured keyless tunnels will match fallback tunnel. */ #define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1)) static u32 HASH_ADDR(const struct in6_addr *addr) { u32 hash = ipv6_addr_hash(addr); return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT); } #define tunnels_r_l tunnels[3] #define tunnels_r tunnels[2] #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, const struct in6_addr *remote, const struct in6_addr *local, __be32 key, __be16 gre_proto) { struct net *net = dev_net(dev); int link = dev->ifindex; unsigned int h0 = HASH_ADDR(remote); unsigned int h1 = HASH_KEY(key); struct ip6_tnl *t, *cand = NULL; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); int dev_type = (gre_proto == htons(ETH_P_TEB) || gre_proto == htons(ETH_P_ERSPAN) || gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; struct net_device *ndev; for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_equal(remote, &t->parms.raddr) || key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; if (t->dev->type != ARPHRD_IP6GRE && t->dev->type != dev_type) continue; score = 0; if (t->parms.link != link) score |= 1; if (t->dev->type != dev_type) score |= 2; if (score == 0) return t; if (score < cand_score) { cand = t; cand_score = score; } } for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { if (!ipv6_addr_equal(remote, &t->parms.raddr) || key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; if (t->dev->type != ARPHRD_IP6GRE && t->dev->type != dev_type) continue; score = 0; if (t->parms.link != link) score |= 1; if (t->dev->type != dev_type) score |= 2; if (score == 0) return t; if (score < cand_score) { cand = t; cand_score = score; } } for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { if ((!ipv6_addr_equal(local, &t->parms.laddr) && (!ipv6_addr_equal(local, &t->parms.raddr) || !ipv6_addr_is_multicast(local))) || key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; if (t->dev->type != ARPHRD_IP6GRE && t->dev->type != dev_type) continue; score = 0; if (t->parms.link != link) score |= 1; if (t->dev->type != dev_type) score |= 2; if (score == 0) return t; if (score < cand_score) { cand = t; cand_score = score; } } for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { if (t->parms.i_key != key || !(t->dev->flags & IFF_UP)) continue; if (t->dev->type != ARPHRD_IP6GRE && t->dev->type != dev_type) continue; score = 0; if (t->parms.link != link) score |= 1; if (t->dev->type != dev_type) score |= 2; if (score == 0) return t; if (score < cand_score) { cand = t; cand_score = score; } } if (cand) return cand; if (gre_proto == htons(ETH_P_ERSPAN) || gre_proto == htons(ETH_P_ERSPAN2)) t = rcu_dereference(ign->collect_md_tun_erspan); else t = rcu_dereference(ign->collect_md_tun); if (t && t->dev->flags & IFF_UP) return t; ndev = READ_ONCE(ign->fb_tunnel_dev); if (ndev && ndev->flags & IFF_UP) return netdev_priv(ndev); return NULL; } static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; unsigned int h = HASH_KEY(p->i_key); int prio = 0; if (!ipv6_addr_any(local)) prio |= 1; if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) { prio |= 2; h ^= HASH_ADDR(remote); } return &ign->tunnels[prio][h]; } static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) { if (t->parms.collect_md) rcu_assign_pointer(ign->collect_md_tun, t); } static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) { if (t->parms.collect_md) rcu_assign_pointer(ign->collect_md_tun_erspan, t); } static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t) { if (t->parms.collect_md) rcu_assign_pointer(ign->collect_md_tun, NULL); } static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t) { if (t->parms.collect_md) rcu_assign_pointer(ign->collect_md_tun_erspan, NULL); } static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, const struct ip6_tnl *t) { return __ip6gre_bucket(ign, &t->parms); } static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; for (tp = ip6gre_bucket(ign, t); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { rcu_assign_pointer(*tp, t->next); break; } } } static struct ip6_tnl *ip6gre_tunnel_find(struct net *net, const struct __ip6_tnl_parm *parms, int type) { const struct in6_addr *remote = &parms->raddr; const struct in6_addr *local = &parms->laddr; __be32 key = parms->i_key; int link = parms->link; struct ip6_tnl *t; struct ip6_tnl __rcu **tp; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); for (tp = __ip6gre_bucket(ign, parms); (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && key == t->parms.i_key && link == t->parms.link && type == t->dev->type) break; return t; } static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, const struct __ip6_tnl_parm *parms, int create) { struct ip6_tnl *t, *nt; struct net_device *dev; char name[IFNAMSIZ]; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); if (t && create) return NULL; if (t || !create) return t; if (parms->name[0]) { if (!dev_valid_name(parms->name)) return NULL; strscpy(name, parms->name, IFNAMSIZ); } else { strcpy(name, "ip6gre%d"); } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6gre_tunnel_setup); if (!dev) return NULL; dev_net_set(dev, net); nt = netdev_priv(dev); nt->parms = *parms; dev->rtnl_link_ops = &ip6gre_link_ops; nt->dev = dev; nt->net = dev_net(dev); if (register_netdevice(dev) < 0) goto failed_free; ip6gre_tnl_link_config(nt, 1); ip6gre_tunnel_link(ign, nt); return nt; failed_free: free_netdev(dev); return NULL; } static void ip6erspan_tunnel_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6erspan_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); dst_cache_reset(&t->dst_cache); netdev_put(dev, &t->dev_tracker); } static void ip6gre_tunnel_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); if (ign->fb_tunnel_dev == dev) WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); netdev_put(dev, &t->dev_tracker); } static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); const struct ipv6hdr *ipv6h; struct tnl_ptk_info tpi; struct ip6_tnl *t; if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6), offset) < 0) return -EINVAL; ipv6h = (const struct ipv6hdr *)skb->data; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, tpi.key, tpi.proto); if (!t) return -ENOENT; switch (type) { case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); if (code != ICMPV6_PORT_UNREACH) break; return 0; case ICMPV6_TIME_EXCEED: if (code == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); break; } return 0; case ICMPV6_PARAMPROB: { struct ipv6_tlv_tnl_enc_lim *tel; __u32 teli; teli = 0; if (code == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); if (teli && teli == be32_to_cpu(info) - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", t->parms.name); } } else { net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } return 0; } case ICMPV6_PKT_TOOBIG: ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); return 0; case NDISC_REDIRECT: ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); return 0; } if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; return 0; } static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { const struct ipv6hdr *ipv6h; struct ip6_tnl *tunnel; ipv6h = ipv6_hdr(skb); tunnel = ip6gre_tunnel_lookup(skb->dev, &ipv6h->saddr, &ipv6h->daddr, tpi->key, tpi->proto); if (tunnel) { if (tunnel->parms.collect_md) { IP_TUNNEL_DECLARE_FLAGS(flags); struct metadata_dst *tun_dst; __be64 tun_id; ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); } else { ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } return PACKET_RCVD; } return PACKET_REJECT; } static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, int gre_hdr_len) { struct erspan_base_hdr *ershdr; const struct ipv6hdr *ipv6h; struct erspan_md2 *md2; struct ip6_tnl *tunnel; u8 ver; if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) return PACKET_REJECT; ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; tunnel = ip6gre_tunnel_lookup(skb->dev, &ipv6h->saddr, &ipv6h->daddr, tpi->key, tpi->proto); if (tunnel) { int len = erspan_hdr_len(ver); if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), false, false) < 0) return PACKET_REJECT; if (tunnel->parms.collect_md) { struct erspan_metadata *pkt_md, *md; IP_TUNNEL_DECLARE_FLAGS(flags); struct metadata_dst *tun_dst; struct ip_tunnel_info *info; unsigned char *gh; __be64 tun_id; __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags); ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, sizeof(*md)); if (!tun_dst) return PACKET_REJECT; /* skb can be uncloned in __iptunnel_pull_header, so * old pkt_md is no longer valid and we need to reset * it */ gh = skb_network_header(skb) + skb_network_header_len(skb); pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + sizeof(*ershdr)); info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); md->version = ver; md2 = &md->u.md2; memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); info->options_len = sizeof(*md); ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); } else { ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } return PACKET_RCVD; } return PACKET_REJECT; } static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; int hdr_len; hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) goto drop; if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; goto out; } if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); return 0; } static int gre_handle_offloads(struct sk_buff *skb, bool csum) { return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev, struct flowi6 *fl6, __u8 *dsfield, int *encap_limit) { const struct iphdr *iph = ip_hdr(skb); struct ip6_tnl *t = netdev_priv(dev); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) *encap_limit = t->parms.encap_limit; memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) *dsfield = ipv4_get_dsfield(iph); else *dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6->flowi6_mark = skb->mark; else fl6->flowi6_mark = t->parms.fwmark; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); } static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev, struct flowi6 *fl6, __u8 *dsfield, int *encap_limit) { struct ipv6hdr *ipv6h; struct ip6_tnl *t = netdev_priv(dev); __u16 offset; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ ipv6h = ipv6_hdr(skb); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offset + 2); return -1; } *encap_limit = tel->encap_limit - 1; } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { *encap_limit = t->parms.encap_limit; } memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) *dsfield = ipv6_get_dsfield(ipv6h); else *dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6->flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6->flowi6_mark = skb->mark; else fl6->flowi6_mark = t->parms.fwmark; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); return 0; } static int prepare_ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev, struct flowi6 *fl6, __u8 *dsfield, int *encap_limit) { struct ip6_tnl *t = netdev_priv(dev); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) *encap_limit = t->parms.encap_limit; memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) *dsfield = 0; else *dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6->flowi6_mark = skb->mark; else fl6->flowi6_mark = t->parms.fwmark; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); return 0; } static struct ip_tunnel_info *skb_tunnel_info_txcheck(struct sk_buff *skb) { struct ip_tunnel_info *tun_info; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))) return ERR_PTR(-EINVAL); return tun_info; } static netdev_tx_t __gre6_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct flowi6 *fl6, int encap_limit, __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); IP_TUNNEL_DECLARE_FLAGS(flags); __be16 protocol; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; if (dev->header_ops && dev->type == ARPHRD_IP6GRE) fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr; else fl6->daddr = tunnel->parms.raddr; /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; if (tunnel->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; int tun_hlen; tun_info = skb_tunnel_info_txcheck(skb); if (IS_ERR(tun_info) || unlikely(ip_tunnel_info_af(tun_info) != AF_INET6)) return -EINVAL; key = &tun_info->key; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = IPPROTO_GRE; fl6->daddr = key->u.ipv6.dst; fl6->flowlabel = key->label; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; ip_tunnel_flags_zero(flags); __set_bit(IP_TUNNEL_CSUM_BIT, flags); __set_bit(IP_TUNNEL_KEY_BIT, flags); __set_bit(IP_TUNNEL_SEQ_BIT, flags); ip_tunnel_flags_and(flags, flags, key->tun_flags); tun_hlen = gre_calc_hlen(flags); if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen)) return -ENOMEM; gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), test_bit(IP_TUNNEL_SEQ_BIT, flags) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); } else { if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) return -ENOMEM; ip_tunnel_flags_copy(flags, tunnel->parms.o_flags); gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, test_bit(IP_TUNNEL_SEQ_BIT, flags) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); } static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int encap_limit = -1; struct flowi6 fl6; __u8 dsfield = 0; __u32 mtu; int err; memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); if (!t->parms.collect_md) prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, &dsfield, &encap_limit); err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, t->parms.o_flags)); if (err) return -1; err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -1; } return 0; } static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct ipv6hdr *ipv6h = ipv6_hdr(skb); int encap_limit = -1; struct flowi6 fl6; __u8 dsfield = 0; __u32 mtu; int err; if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) return -1; if (!t->parms.collect_md && prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) return -1; if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, t->parms.o_flags))) return -1; err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol); if (err != 0) { if (err == -EMSGSIZE) icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -1; } return 0; } static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int encap_limit = -1; struct flowi6 fl6; __u8 dsfield = 0; __u32 mtu; int err; if (!t->parms.collect_md && prepare_ip6gre_xmit_other(skb, dev, &fl6, &dsfield, &encap_limit)) return -1; err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, t->parms.o_flags)); if (err) return err; err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol); return err; } static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); __be16 payload_protocol; int ret; if (!pskb_inet_may_pull(skb)) goto tx_err; if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; payload_protocol = skb_protocol(skb, true); switch (payload_protocol) { case htons(ETH_P_IP): ret = ip6gre_xmit_ipv4(skb, dev); break; case htons(ETH_P_IPV6): ret = ip6gre_xmit_ipv6(skb, dev); break; default: ret = ip6gre_xmit_other(skb, dev); break; } if (ret < 0) goto tx_err; return NETDEV_TX_OK; tx_err: if (!t->parms.collect_md || !IS_ERR(skb_tunnel_info_txcheck(skb))) DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel_info *tun_info = NULL; struct ip6_tnl *t = netdev_priv(dev); struct dst_entry *dst = skb_dst(skb); IP_TUNNEL_DECLARE_FLAGS(flags) = { }; bool truncate = false; int encap_limit = -1; __u8 dsfield = false; struct flowi6 fl6; int err = -EINVAL; __be16 proto; __u32 mtu; int nhoff; if (!pskb_inet_may_pull(skb)) goto tx_err; if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; if (gre_handle_offloads(skb, false)) goto tx_err; if (skb->len > dev->mtu + dev->hard_header_len) { if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) goto tx_err; truncate = true; } nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; if (skb->protocol == htons(ETH_P_IPV6)) { int thoff; if (skb_transport_header_was_set(skb)) thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) truncate = true; } if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; __clear_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags); IPCB(skb)->flags = 0; /* For collect_md mode, derive fl6 from the tunnel key, * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}. */ if (t->parms.collect_md) { const struct ip_tunnel_key *key; struct erspan_metadata *md; __be32 tun_id; tun_info = skb_tunnel_info_txcheck(skb); if (IS_ERR(tun_info) || unlikely(ip_tunnel_info_af(tun_info) != AF_INET6)) goto tx_err; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_GRE; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags)) goto tx_err; if (tun_info->options_len < sizeof(*md)) goto tx_err; md = ip_tunnel_info_opts(tun_info); tun_id = tunnel_id_to_key32(key->tun_id); if (md->version == 1) { erspan_build_header(skb, ntohl(tun_id), ntohl(md->u.index), truncate, false); proto = htons(ETH_P_ERSPAN); } else if (md->version == 2) { erspan_build_header_v2(skb, ntohl(tun_id), md->u.md2.dir, get_hwid(&md->u.md2), truncate, false); proto = htons(ETH_P_ERSPAN2); } else { goto tx_err; } } else { switch (skb->protocol) { case htons(ETH_P_IP): memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, &dsfield, &encap_limit); break; case htons(ETH_P_IPV6): if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr)) goto tx_err; if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) goto tx_err; break; default: memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); break; } if (t->parms.erspan_ver == 1) { erspan_build_header(skb, ntohl(t->parms.o_key), t->parms.index, truncate, false); proto = htons(ETH_P_ERSPAN); } else if (t->parms.erspan_ver == 2) { erspan_build_header_v2(skb, ntohl(t->parms.o_key), t->parms.dir, t->parms.hwid, truncate, false); proto = htons(ETH_P_ERSPAN2); } else { goto tx_err; } fl6.daddr = t->parms.raddr; } /* Push GRE header. */ __set_bit(IP_TUNNEL_SEQ_BIT, flags); gre_build_header(skb, 8, flags, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, NEXTHDR_GRE); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) { if (skb->protocol == htons(ETH_P_IP)) icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); else icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } goto tx_err; } return NETDEV_TX_OK; tx_err: if (!IS_ERR(tun_info)) DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; if (dev->type != ARPHRD_ETHER) { __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); } /* Set up flowi template */ fl6->saddr = p->laddr; fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; fl6->flowi6_proto = IPPROTO_GRE; fl6->fl6_gre_key = t->parms.o_key; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; } static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, int t_hlen) { const struct __ip6_tnl_parm *p = &t->parms; struct net_device *dev = t->dev; if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, NULL, strict); if (!rt) return; if (rt->dst.dev) { unsigned short dst_len = rt->dst.dev->hard_header_len + t_hlen; if (t->dev->header_ops) dev->hard_header_len = dst_len; else dev->needed_headroom = dst_len; if (set_mtu) { int mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) mtu -= 8; if (dev->type == ARPHRD_ETHER) mtu -= ETH_HLEN; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; WRITE_ONCE(dev->mtu, mtu); } } ip6_rt_put(rt); } } static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) { int t_hlen; tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); if (tunnel->dev->header_ops) tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; else tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; return t_hlen; } static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) { ip6gre_tnl_link_config_common(t); ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t)); } static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; t->parms.flags = p->flags; t->parms.hop_limit = p->hop_limit; t->parms.encap_limit = p->encap_limit; t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags); ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags); t->parms.fwmark = p->fwmark; t->parms.erspan_ver = p->erspan_ver; t->parms.index = p->index; t->parms.dir = p->dir; t->parms.hwid = p->hwid; dst_cache_reset(&t->dst_cache); } static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, int set_mtu) { ip6gre_tnl_copy_tnl_parm(t, p); ip6gre_tnl_link_config(t, set_mtu); return 0; } static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u) { p->laddr = u->laddr; p->raddr = u->raddr; p->flags = u->flags; p->hop_limit = u->hop_limit; p->encap_limit = u->encap_limit; p->flowinfo = u->flowinfo; p->link = u->link; p->i_key = u->i_key; p->o_key = u->o_key; gre_flags_to_tnl_flags(p->i_flags, u->i_flags); gre_flags_to_tnl_flags(p->o_flags, u->o_flags); memcpy(p->name, u->name, sizeof(u->name)); } static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) { u->proto = IPPROTO_GRE; u->laddr = p->laddr; u->raddr = p->raddr; u->flags = p->flags; u->hop_limit = p->hop_limit; u->encap_limit = p->encap_limit; u->flowinfo = p->flowinfo; u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); memcpy(u->name, p->name, sizeof(u->name)); } static int ip6gre_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm2 p; struct __ip6_tnl_parm p1; struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); memset(&p1, 0, sizeof(p1)); switch (cmd) { case SIOCGETTUNNEL: if (dev == ign->fb_tunnel_dev) { if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); if (!t) t = netdev_priv(dev); } memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) goto done; err = -EINVAL; if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)) goto done; if (!(p.i_flags&GRE_KEY)) p.i_key = 0; if (!(p.o_flags&GRE_KEY)) p.o_key = 0; ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { if (t) { if (t->dev != dev) { err = -EEXIST; break; } } else { t = netdev_priv(dev); ip6gre_tunnel_unlink(ign, t); synchronize_net(); ip6gre_tnl_change(t, &p1, 1); ip6gre_tunnel_link(ign, t); netdev_state_change(dev); } } if (t) { err = 0; memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); break; case SIOCDELTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; if (dev == ign->fb_tunnel_dev) { err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) goto done; err = -ENOENT; ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); if (!t) goto done; err = -EPERM; if (t == netdev_priv(ign->fb_tunnel_dev)) goto done; dev = t->dev; } unregister_netdevice(dev); err = 0; break; default: err = -EINVAL; } done: return err; } static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ip6_tnl *t = netdev_priv(dev); struct ipv6hdr *ipv6h; __be16 *p; ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h)); ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, t->fl.u.ip6.flowlabel, true, &t->fl.u.ip6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; ipv6h->daddr = t->parms.raddr; p = (__be16 *)(ipv6h + 1); p[0] = ip_tunnel_flags_to_be16(t->parms.o_flags); p[1] = htons(type); /* * Set the source hardware address. */ if (saddr) memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr)); if (daddr) memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr)); if (!ipv6_addr_any(&ipv6h->daddr)) return t->hlen; return -t->hlen; } static const struct header_ops ip6gre_header_ops = { .create = ip6gre_header, }; static const struct net_device_ops ip6gre_netdev_ops = { .ndo_init = ip6gre_tunnel_init, .ndo_uninit = ip6gre_tunnel_uninit, .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_iflink = ip6_tnl_get_iflink, }; static void ip6gre_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); } static void ip6gre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ip6gre_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->type = ARPHRD_IP6GRE; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); } #define GRE6_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ NETIF_F_HW_CSUM) static void ip6gre_tnl_init_features(struct net_device *dev) { struct ip6_tnl *nt = netdev_priv(dev); dev->features |= GRE6_FEATURES; dev->hw_features |= GRE6_FEATURES; /* TCP offload with GRE SEQ is not supported, nor can we support 2 * levels of outer headers requiring an update. */ if (test_bit(IP_TUNNEL_SEQ_BIT, nt->parms.o_flags)) return; if (test_bit(IP_TUNNEL_CSUM_BIT, nt->parms.o_flags) && nt->encap.type != TUNNEL_ENCAP_NONE) return; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->lltx = true; } static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; int ret; int t_hlen; tunnel = netdev_priv(dev); tunnel->dev = dev; tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (ret) return ret; ret = gro_cells_init(&tunnel->gro_cells, dev); if (ret) goto cleanup_dst_cache_init; t_hlen = ip6gre_calc_hlen(tunnel); dev->mtu = ETH_DATA_LEN - t_hlen; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; if (tunnel->parms.collect_md) { netif_keep_dst(dev); } ip6gre_tnl_init_features(dev); netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; cleanup_dst_cache_init: dst_cache_destroy(&tunnel->dst_cache); return ret; } static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; int ret; ret = ip6gre_tunnel_init_common(dev); if (ret) return ret; tunnel = netdev_priv(dev); if (tunnel->parms.collect_md) return 0; __dev_addr_set(dev, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; return 0; } static void ip6gre_fb_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); tunnel->dev = dev; tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); tunnel->hlen = sizeof(struct ipv6hdr) + 4; } static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, .flags = INET6_PROTO_FINAL, }; static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) { struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct net_device *dev, *aux; int prio; for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6gre_link_ops || dev->rtnl_link_ops == &ip6gre_tap_ops || dev->rtnl_link_ops == &ip6erspan_tap_ops) unregister_netdevice_queue(dev, head); for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < IP6_GRE_HASH_SIZE; h++) { struct ip6_tnl *t; t = rtnl_dereference(ign->tunnels[prio][h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, head); t = rtnl_dereference(t->next); } } } } static int __net_init ip6gre_init_net(struct net *net) { struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct net_device *ndev; int err; if (!net_has_fallback_tunnels(net)) return 0; ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", NET_NAME_UNKNOWN, ip6gre_tunnel_setup); if (!ndev) { err = -ENOMEM; goto err_alloc_dev; } ign->fb_tunnel_dev = ndev; dev_net_set(ign->fb_tunnel_dev, net); /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ ign->fb_tunnel_dev->netns_local = true; ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; err = register_netdev(ign->fb_tunnel_dev); if (err) goto err_reg_dev; rcu_assign_pointer(ign->tunnels_wc[0], netdev_priv(ign->fb_tunnel_dev)); return 0; err_reg_dev: free_netdev(ndev); err_alloc_dev: return err; } static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list, struct list_head *dev_to_kill) { struct net *net; ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) ip6gre_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, .exit_batch_rtnl = ip6gre_exit_batch_rtnl, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { __be16 flags; if (!data) return 0; flags = 0; if (data[IFLA_GRE_IFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); if (data[IFLA_GRE_OFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); if (flags & (GRE_VERSION|GRE_ROUTING)) return -EINVAL; return 0; } static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct in6_addr daddr; if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } if (!data) goto out; if (data[IFLA_GRE_REMOTE]) { daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (ipv6_addr_any(&daddr)) return -EINVAL; } out: return ip6gre_tunnel_validate(tb, data, extack); } static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { __be16 flags = 0; int ret, ver = 0; if (!data) return 0; ret = ip6gre_tap_validate(tb, data, extack); if (ret) return ret; /* ERSPAN should only have GRE sequence and key flag */ if (data[IFLA_GRE_OFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); if (data[IFLA_GRE_IFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); if (!data[IFLA_GRE_COLLECT_METADATA] && flags != (GRE_SEQ | GRE_KEY)) return -EINVAL; /* ERSPAN Session ID only has 10-bit. Since we reuse * 32-bit key field as ID, check it's range. */ if (data[IFLA_GRE_IKEY] && (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) return -EINVAL; if (data[IFLA_GRE_OKEY] && (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) return -EINVAL; if (data[IFLA_GRE_ERSPAN_VER]) { ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); if (ver != 1 && ver != 2) return -EINVAL; } if (ver == 1) { if (data[IFLA_GRE_ERSPAN_INDEX]) { u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); if (index & ~INDEX_MASK) return -EINVAL; } } else if (ver == 2) { if (data[IFLA_GRE_ERSPAN_DIR]) { u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); if (dir & ~(DIR_MASK >> DIR_OFFSET)) return -EINVAL; } if (data[IFLA_GRE_ERSPAN_HWID]) { u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); if (hwid & ~(HWID_MASK >> HWID_OFFSET)) return -EINVAL; } } return 0; } static void ip6erspan_set_version(struct nlattr *data[], struct __ip6_tnl_parm *parms) { if (!data) return; parms->erspan_ver = 1; if (data[IFLA_GRE_ERSPAN_VER]) parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); if (parms->erspan_ver == 1) { if (data[IFLA_GRE_ERSPAN_INDEX]) parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); } else if (parms->erspan_ver == 2) { if (data[IFLA_GRE_ERSPAN_DIR]) parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); if (data[IFLA_GRE_ERSPAN_HWID]) parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); } } static void ip6gre_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { memset(parms, 0, sizeof(*parms)); if (!data) return; if (data[IFLA_GRE_LINK]) parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) gre_flags_to_tnl_flags(parms->i_flags, nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) gre_flags_to_tnl_flags(parms->o_flags, nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); if (data[IFLA_GRE_OKEY]) parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); if (data[IFLA_GRE_LOCAL]) parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]); if (data[IFLA_GRE_REMOTE]) parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (data[IFLA_GRE_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); if (data[IFLA_GRE_ENCAP_LIMIT]) parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); if (data[IFLA_GRE_FLOWINFO]) parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]); if (data[IFLA_GRE_FLAGS]) parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); if (data[IFLA_GRE_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); if (data[IFLA_GRE_COLLECT_METADATA]) parms->collect_md = true; } static int ip6gre_tap_init(struct net_device *dev) { int ret; ret = ip6gre_tunnel_init_common(dev); if (ret) return ret; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; return 0; } static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_init = ip6gre_tap_init, .ndo_uninit = ip6gre_tunnel_uninit, .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_iflink = ip6_tnl_get_iflink, }; static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel) { int t_hlen; tunnel->tun_hlen = 8; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + erspan_hdr_len(tunnel->parms.erspan_ver); t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; return t_hlen; } static int ip6erspan_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; int t_hlen; int ret; tunnel = netdev_priv(dev); tunnel->dev = dev; tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (ret) return ret; ret = gro_cells_init(&tunnel->gro_cells, dev); if (ret) goto cleanup_dst_cache_init; t_hlen = ip6erspan_calc_hlen(tunnel); dev->mtu = ETH_DATA_LEN - t_hlen; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1); netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; cleanup_dst_cache_init: dst_cache_destroy(&tunnel->dst_cache); return ret; } static const struct net_device_ops ip6erspan_netdev_ops = { .ndo_init = ip6erspan_tap_init, .ndo_uninit = ip6erspan_tunnel_uninit, .ndo_start_xmit = ip6erspan_tunnel_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_iflink = ip6_tnl_get_iflink, }; static void ip6gre_tap_setup(struct net_device *dev) { ether_setup(dev); dev->max_mtu = 0; dev->netdev_ops = &ip6gre_tap_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev); } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *ipencap) { bool ret = false; memset(ipencap, 0, sizeof(*ipencap)); if (!data) return ret; if (data[IFLA_GRE_ENCAP_TYPE]) { ret = true; ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); } if (data[IFLA_GRE_ENCAP_FLAGS]) { ret = true; ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); } if (data[IFLA_GRE_ENCAP_SPORT]) { ret = true; ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); } if (data[IFLA_GRE_ENCAP_DPORT]) { ret = true; ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); } return ret; } static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6_tnl *nt; struct ip_tunnel_encap ipencap; int err; nt = netdev_priv(dev); if (ip6gre_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return err; } if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); nt->dev = dev; nt->net = dev_net(dev); err = register_netdevice(dev); if (err) goto out; if (tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); out: return err; } static int ip6gre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6_tnl *nt = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6gre_net *ign; int err; ip6gre_netlink_parms(data, &nt->parms); ign = net_generic(net, ip6gre_net_id); if (nt->parms.collect_md) { if (rtnl_dereference(ign->collect_md_tun)) return -EEXIST; } else { if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) return -EEXIST; } err = ip6gre_newlink_common(src_net, dev, tb, data, extack); if (!err) { ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); ip6gre_tunnel_link_md(ign, nt); ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); } return err; } static struct ip6_tnl * ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct __ip6_tnl_parm *p_p, struct netlink_ext_ack *extack) { struct ip6_tnl *t, *nt = netdev_priv(dev); struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct ip_tunnel_encap ipencap; if (dev == ign->fb_tunnel_dev) return ERR_PTR(-EINVAL); if (ip6gre_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return ERR_PTR(err); } ip6gre_netlink_parms(data, p_p); t = ip6gre_tunnel_locate(net, p_p, 0); if (t) { if (t->dev != dev) return ERR_PTR(-EEXIST); } else { t = nt; } return t; } static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6_tnl *t = netdev_priv(dev); struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); struct __ip6_tnl_parm p; t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) return PTR_ERR(t); ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); ip6gre_tunnel_link_md(ign, t); ip6gre_tunnel_link(ign, t); return 0; } static void ip6gre_dellink(struct net_device *dev, struct list_head *head) { struct net *net = dev_net(dev); struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); if (dev != ign->fb_tunnel_dev) unregister_netdevice_queue(dev, head); } static size_t ip6gre_get_size(const struct net_device *dev) { return /* IFLA_GRE_LINK */ nla_total_size(4) + /* IFLA_GRE_IFLAGS */ nla_total_size(2) + /* IFLA_GRE_OFLAGS */ nla_total_size(2) + /* IFLA_GRE_IKEY */ nla_total_size(4) + /* IFLA_GRE_OKEY */ nla_total_size(4) + /* IFLA_GRE_LOCAL */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GRE_REMOTE */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GRE_TTL */ nla_total_size(1) + /* IFLA_GRE_ENCAP_LIMIT */ nla_total_size(1) + /* IFLA_GRE_FLOWINFO */ nla_total_size(4) + /* IFLA_GRE_FLAGS */ nla_total_size(4) + /* IFLA_GRE_ENCAP_TYPE */ nla_total_size(2) + /* IFLA_GRE_ENCAP_FLAGS */ nla_total_size(2) + /* IFLA_GRE_ENCAP_SPORT */ nla_total_size(2) + /* IFLA_GRE_ENCAP_DPORT */ nla_total_size(2) + /* IFLA_GRE_COLLECT_METADATA */ nla_total_size(0) + /* IFLA_GRE_FWMARK */ nla_total_size(4) + /* IFLA_GRE_ERSPAN_INDEX */ nla_total_size(4) + 0; } static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; IP_TUNNEL_DECLARE_FLAGS(o_flags); ip_tunnel_flags_copy(o_flags, p->o_flags); if (p->erspan_ver == 1 || p->erspan_ver == 2) { if (!p->collect_md) __set_bit(IP_TUNNEL_KEY_BIT, o_flags); if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) goto nla_put_failure; if (p->erspan_ver == 1) { if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) goto nla_put_failure; } else { if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid)) goto nla_put_failure; } } if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, t->encap.type) || nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, t->encap.sport) || nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, t->encap.dport) || nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, t->encap.flags)) goto nla_put_failure; if (p->collect_md) { if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) goto nla_put_failure; } return 0; nla_put_failure: return -EMSGSIZE; } static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_LINK] = { .type = NLA_U32 }, [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_IKEY] = { .type = NLA_U32 }, [IFLA_GRE_OKEY] = { .type = NLA_U32 }, [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct ipv6hdr, saddr) }, [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct ipv6hdr, daddr) }, [IFLA_GRE_TTL] = { .type = NLA_U8 }, [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, }; static void ip6erspan_tap_setup(struct net_device *dev) { ether_setup(dev); dev->max_mtu = 0; dev->netdev_ops = &ip6erspan_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev); } static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6_tnl *nt = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6gre_net *ign; int err; ip6gre_netlink_parms(data, &nt->parms); ip6erspan_set_version(data, &nt->parms); ign = net_generic(net, ip6gre_net_id); if (nt->parms.collect_md) { if (rtnl_dereference(ign->collect_md_tun_erspan)) return -EEXIST; } else { if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) return -EEXIST; } err = ip6gre_newlink_common(src_net, dev, tb, data, extack); if (!err) { ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]); ip6erspan_tunnel_link_md(ign, nt); ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); } return err; } static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu) { ip6gre_tnl_link_config_common(t); ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t)); } static int ip6erspan_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, int set_mtu) { ip6gre_tnl_copy_tnl_parm(t, p); ip6erspan_tnl_link_config(t, set_mtu); return 0; } static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); struct __ip6_tnl_parm p; struct ip6_tnl *t; t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) return PTR_ERR(t); ip6erspan_set_version(data, &p); ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]); ip6erspan_tunnel_link_md(ign, t); ip6gre_tunnel_link(ign, t); return 0; } static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .kind = "ip6gre", .maxtype = IFLA_GRE_MAX, .policy = ip6gre_policy, .priv_size = sizeof(struct ip6_tnl), .setup = ip6gre_tunnel_setup, .validate = ip6gre_tunnel_validate, .newlink = ip6gre_newlink, .changelink = ip6gre_changelink, .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, .get_link_net = ip6_tnl_get_link_net, }; static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { .kind = "ip6gretap", .maxtype = IFLA_GRE_MAX, .policy = ip6gre_policy, .priv_size = sizeof(struct ip6_tnl), .setup = ip6gre_tap_setup, .validate = ip6gre_tap_validate, .newlink = ip6gre_newlink, .changelink = ip6gre_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, .get_link_net = ip6_tnl_get_link_net, }; static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = { .kind = "ip6erspan", .maxtype = IFLA_GRE_MAX, .policy = ip6gre_policy, .priv_size = sizeof(struct ip6_tnl), .setup = ip6erspan_tap_setup, .validate = ip6erspan_tap_validate, .newlink = ip6erspan_newlink, .changelink = ip6erspan_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, .get_link_net = ip6_tnl_get_link_net, }; /* * And now the modules code and kernel interface. */ static int __init ip6gre_init(void) { int err; pr_info("GRE over IPv6 tunneling driver\n"); err = register_pernet_device(&ip6gre_net_ops); if (err < 0) return err; err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; } err = rtnl_link_register(&ip6gre_link_ops); if (err < 0) goto rtnl_link_failed; err = rtnl_link_register(&ip6gre_tap_ops); if (err < 0) goto tap_ops_failed; err = rtnl_link_register(&ip6erspan_tap_ops); if (err < 0) goto erspan_link_failed; out: return err; erspan_link_failed: rtnl_link_unregister(&ip6gre_tap_ops); tap_ops_failed: rtnl_link_unregister(&ip6gre_link_ops); rtnl_link_failed: inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); add_proto_failed: unregister_pernet_device(&ip6gre_net_ops); goto out; } static void __exit ip6gre_fini(void) { rtnl_link_unregister(&ip6gre_tap_ops); rtnl_link_unregister(&ip6gre_link_ops); rtnl_link_unregister(&ip6erspan_tap_ops); inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); unregister_pernet_device(&ip6gre_net_ops); } module_init(ip6gre_init); module_exit(ip6gre_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("D. Kozlov <xeb@mail.ru>"); MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); MODULE_ALIAS_RTNL_LINK("ip6gre"); MODULE_ALIAS_RTNL_LINK("ip6gretap"); MODULE_ALIAS_RTNL_LINK("ip6erspan"); MODULE_ALIAS_NETDEV("ip6gre0"); |
28 11 3 70 69 57 21 60 3 2 1 1 57 57 2 57 57 20 57 60 6 57 57 57 6 5 1 1 7 36 36 34 1 34 36 4 36 15 36 5 36 2 47 36 36 34 33 3 36 15 1 3 3 8 8 1 8 1 6 7 1 1 1 4 1 1 8 8 1 4 2 3 5 5 5 5 5 5 5 3 15 1 13 35 34 42 1 47 4 46 34 2 2 48 48 47 44 38 1 48 48 34 48 48 46 41 48 1 46 2 46 2 1 1 46 19 48 4 1 1 4 4 47 4 48 48 35 35 35 35 35 4 2 2 1 1 1 2 2 2 48 17 47 35 12 35 35 35 35 35 14 1 1 13 13 13 12 3 3 1 3 13 12 12 13 4 4 5 3 2 2 1 1 15 15 10 15 15 1 3 8 25 8 1 26 15 11 2 11 26 26 22 4 30 2 3 1 1 28 2 15 15 15 11 28 22 20 22 21 20 17 2 6 12 4 2 2 7 2 7 14 1 3 3 4 3 4 1 3 13 13 3 3 2 2 2 1 2 3 3 27 1 2 3 1 23 23 23 24 22 22 2 5 2 3 2 1 1 11 19 38 2 6 8 10 1 15 6 6 17 70 70 2 68 2 64 1 36 27 36 10 13 14 36 36 49 18 5 13 2 11 54 55 49 25 20 4 1 4 3 4 45 45 20 34 23 34 49 15 4 11 52 32 26 2 24 1 1 1 1 36 39 23 51 35 39 33 3 4 32 79 73 2 70 71 68 4 67 4 71 70 52 20 52 22 50 80 1 79 21 21 21 21 21 3 21 21 1 20 19 19 21 21 17 17 1 21 1 21 21 21 5 2 4 21 17 4 21 21 21 17 21 17 21 5 21 20 19 18 19 20 19 21 18 21 3 12 21 21 1 21 21 21 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 | // SPDX-License-Identifier: GPL-2.0 #include <linux/pagewalk.h> #include <linux/mm_inline.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> #include <linux/mount.h> #include <linux/ksm.h> #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> #include <linux/rmap.h> #include <linux/swap.h> #include <linux/sched/mm.h> #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> #include <linux/shmem_fs.h> #include <linux/uaccess.h> #include <linux/pkeys.h> #include <linux/minmax.h> #include <linux/overflow.h> #include <linux/buildid.h> #include <asm/elf.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include "internal.h" #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; anon = get_mm_counter(mm, MM_ANONPAGES); file = get_mm_counter(mm, MM_FILEPAGES); shmem = get_mm_counter(mm, MM_SHMEMPAGES); /* * Note: to minimize their overhead, mm maintains hiwater_vm and * hiwater_rss only when about to *lower* total_vm or rss. Any * collector of these hiwater stats must therefore get total_vm * and rss too, which will usually be the higher. Barriers? not * worth the effort, such snapshots can always be inconsistent. */ hiwater_vm = total_vm = mm->total_vm; if (hiwater_vm < mm->hiwater_vm) hiwater_vm = mm->hiwater_vm; hiwater_rss = total_rss = anon + file + shmem; if (hiwater_rss < mm->hiwater_rss) hiwater_rss = mm->hiwater_rss; /* split executable areas between text and lib */ text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK); text = min(text, mm->exec_vm << PAGE_SHIFT); lib = (mm->exec_vm << PAGE_SHIFT) - text; swap = get_mm_counter(mm, MM_SWAPENTS); SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm)); SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); SEQ_PUT_DEC(" kB\nRssFile:\t", file); SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem); SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm); SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm); seq_put_decimal_ull_width(m, " kB\nVmExe:\t", text >> 10, 8); seq_put_decimal_ull_width(m, " kB\nVmLib:\t", lib >> 10, 8); seq_put_decimal_ull_width(m, " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); } #undef SEQ_PUT_DEC unsigned long task_vsize(struct mm_struct *mm) { return PAGE_SIZE * mm->total_vm; } unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { *shared = get_mm_counter(mm, MM_FILEPAGES) + get_mm_counter(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; *data = mm->data_vm + mm->stack_vm; *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); return mm->total_vm; } #ifdef CONFIG_NUMA /* * Save get_task_policy() for show_numa_map(). */ static void hold_task_mempolicy(struct proc_maps_private *priv) { struct task_struct *task = priv->task; task_lock(task); priv->task_mempolicy = get_task_policy(task); mpol_get(priv->task_mempolicy); task_unlock(task); } static void release_task_mempolicy(struct proc_maps_private *priv) { mpol_put(priv->task_mempolicy); } #else static void hold_task_mempolicy(struct proc_maps_private *priv) { } static void release_task_mempolicy(struct proc_maps_private *priv) { } #endif static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, loff_t *ppos) { struct vm_area_struct *vma = vma_next(&priv->iter); if (vma) { *ppos = vma->vm_start; } else { *ppos = -2UL; vma = get_gate_vma(priv->mm); } return vma; } static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; unsigned long last_addr = *ppos; struct mm_struct *mm; /* See m_next(). Zero at the start or after lseek. */ if (last_addr == -1UL) return NULL; priv->task = get_proc_task(priv->inode); if (!priv->task) return ERR_PTR(-ESRCH); mm = priv->mm; if (!mm || !mmget_not_zero(mm)) { put_task_struct(priv->task); priv->task = NULL; return NULL; } if (mmap_read_lock_killable(mm)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; return ERR_PTR(-EINTR); } vma_iter_init(&priv->iter, mm, last_addr); hold_task_mempolicy(priv); if (last_addr == -2UL) return get_gate_vma(mm); return proc_get_vma(priv, ppos); } static void *m_next(struct seq_file *m, void *v, loff_t *ppos) { if (*ppos == -2UL) { *ppos = -1UL; return NULL; } return proc_get_vma(m->private, ppos); } static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct mm_struct *mm = priv->mm; if (!priv->task) return; release_task_mempolicy(priv); mmap_read_unlock(mm); mmput(mm); put_task_struct(priv->task); priv->task = NULL; } static int proc_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops, int psize) { struct proc_maps_private *priv = __seq_open_private(file, ops, psize); if (!priv) return -ENOMEM; priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); if (IS_ERR(priv->mm)) { int err = PTR_ERR(priv->mm); seq_release_private(inode, file); return err; } return 0; } static int proc_map_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; if (priv->mm) mmdrop(priv->mm); return seq_release_private(inode, file); } static int do_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { return proc_maps_open(inode, file, ops, sizeof(struct proc_maps_private)); } static void get_vma_name(struct vm_area_struct *vma, const struct path **path, const char **name, const char **name_fmt) { struct anon_vma_name *anon_name = vma->vm_mm ? anon_vma_name(vma) : NULL; *name = NULL; *path = NULL; *name_fmt = NULL; /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (vma->vm_file) { /* * If user named this anon shared memory via * prctl(PR_SET_VMA ..., use the provided name. */ if (anon_name) { *name_fmt = "[anon_shmem:%s]"; *name = anon_name->name; } else { *path = file_user_path(vma->vm_file); } return; } if (vma->vm_ops && vma->vm_ops->name) { *name = vma->vm_ops->name(vma); if (*name) return; } *name = arch_vma_name(vma); if (*name) return; if (!vma->vm_mm) { *name = "[vdso]"; return; } if (vma_is_initial_heap(vma)) { *name = "[heap]"; return; } if (vma_is_initial_stack(vma)) { *name = "[stack]"; return; } if (anon_name) { *name_fmt = "[anon:%s]"; *name = anon_name->name; return; } } static void show_vma_header_prefix(struct seq_file *m, unsigned long start, unsigned long end, vm_flags_t flags, unsigned long long pgoff, dev_t dev, unsigned long ino) { seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_put_hex_ll(m, NULL, start, 8); seq_put_hex_ll(m, "-", end, 8); seq_putc(m, ' '); seq_putc(m, flags & VM_READ ? 'r' : '-'); seq_putc(m, flags & VM_WRITE ? 'w' : '-'); seq_putc(m, flags & VM_EXEC ? 'x' : '-'); seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p'); seq_put_hex_ll(m, " ", pgoff, 8); seq_put_hex_ll(m, " ", MAJOR(dev), 2); seq_put_hex_ll(m, ":", MINOR(dev), 2); seq_put_decimal_ull(m, " ", ino); seq_putc(m, ' '); } static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { const struct path *path; const char *name_fmt, *name; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; if (vma->vm_file) { const struct inode *inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; } start = vma->vm_start; end = vma->vm_end; show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); get_vma_name(vma, &path, &name, &name_fmt); if (path) { seq_pad(m, ' '); seq_path(m, path, "\n"); } else if (name_fmt) { seq_pad(m, ' '); seq_printf(m, name_fmt, name); } else if (name) { seq_pad(m, ' '); seq_puts(m, name); } seq_putc(m, '\n'); } static int show_map(struct seq_file *m, void *v) { show_map_vma(m, v); return 0; } static const struct seq_operations proc_pid_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_map }; static int pid_maps_open(struct inode *inode, struct file *file) { return do_maps_open(inode, file, &proc_pid_maps_op); } #define PROCMAP_QUERY_VMA_FLAGS ( \ PROCMAP_QUERY_VMA_READABLE | \ PROCMAP_QUERY_VMA_WRITABLE | \ PROCMAP_QUERY_VMA_EXECUTABLE | \ PROCMAP_QUERY_VMA_SHARED \ ) #define PROCMAP_QUERY_VALID_FLAGS_MASK ( \ PROCMAP_QUERY_COVERING_OR_NEXT_VMA | \ PROCMAP_QUERY_FILE_BACKED_VMA | \ PROCMAP_QUERY_VMA_FLAGS \ ) static int query_vma_setup(struct mm_struct *mm) { return mmap_read_lock_killable(mm); } static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) { mmap_read_unlock(mm); } static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) { return find_vma(mm, addr); } static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, unsigned long addr, u32 flags) { struct vm_area_struct *vma; next_vma: vma = query_vma_find_by_addr(mm, addr); if (!vma) goto no_vma; /* user requested only file-backed VMA, keep iterating */ if ((flags & PROCMAP_QUERY_FILE_BACKED_VMA) && !vma->vm_file) goto skip_vma; /* VMA permissions should satisfy query flags */ if (flags & PROCMAP_QUERY_VMA_FLAGS) { u32 perm = 0; if (flags & PROCMAP_QUERY_VMA_READABLE) perm |= VM_READ; if (flags & PROCMAP_QUERY_VMA_WRITABLE) perm |= VM_WRITE; if (flags & PROCMAP_QUERY_VMA_EXECUTABLE) perm |= VM_EXEC; if (flags & PROCMAP_QUERY_VMA_SHARED) perm |= VM_MAYSHARE; if ((vma->vm_flags & perm) != perm) goto skip_vma; } /* found covering VMA or user is OK with the matching next VMA */ if ((flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) || vma->vm_start <= addr) return vma; skip_vma: /* * If the user needs closest matching VMA, keep iterating. */ addr = vma->vm_end; if (flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) goto next_vma; no_vma: return ERR_PTR(-ENOENT); } static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) { struct procmap_query karg; struct vm_area_struct *vma; struct mm_struct *mm; const char *name = NULL; char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; __u64 usize; int err; if (copy_from_user(&usize, (void __user *)uarg, sizeof(usize))) return -EFAULT; /* argument struct can never be that large, reject abuse */ if (usize > PAGE_SIZE) return -E2BIG; /* argument struct should have at least query_flags and query_addr fields */ if (usize < offsetofend(struct procmap_query, query_addr)) return -EINVAL; err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); if (err) return err; /* reject unknown flags */ if (karg.query_flags & ~PROCMAP_QUERY_VALID_FLAGS_MASK) return -EINVAL; /* either both buffer address and size are set, or both should be zero */ if (!!karg.vma_name_size != !!karg.vma_name_addr) return -EINVAL; if (!!karg.build_id_size != !!karg.build_id_addr) return -EINVAL; mm = priv->mm; if (!mm || !mmget_not_zero(mm)) return -ESRCH; err = query_vma_setup(mm); if (err) { mmput(mm); return err; } vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); if (IS_ERR(vma)) { err = PTR_ERR(vma); vma = NULL; goto out; } karg.vma_start = vma->vm_start; karg.vma_end = vma->vm_end; karg.vma_flags = 0; if (vma->vm_flags & VM_READ) karg.vma_flags |= PROCMAP_QUERY_VMA_READABLE; if (vma->vm_flags & VM_WRITE) karg.vma_flags |= PROCMAP_QUERY_VMA_WRITABLE; if (vma->vm_flags & VM_EXEC) karg.vma_flags |= PROCMAP_QUERY_VMA_EXECUTABLE; if (vma->vm_flags & VM_MAYSHARE) karg.vma_flags |= PROCMAP_QUERY_VMA_SHARED; karg.vma_page_size = vma_kernel_pagesize(vma); if (vma->vm_file) { const struct inode *inode = file_user_inode(vma->vm_file); karg.vma_offset = ((__u64)vma->vm_pgoff) << PAGE_SHIFT; karg.dev_major = MAJOR(inode->i_sb->s_dev); karg.dev_minor = MINOR(inode->i_sb->s_dev); karg.inode = inode->i_ino; } else { karg.vma_offset = 0; karg.dev_major = 0; karg.dev_minor = 0; karg.inode = 0; } if (karg.build_id_size) { __u32 build_id_sz; err = build_id_parse(vma, build_id_buf, &build_id_sz); if (err) { karg.build_id_size = 0; } else { if (karg.build_id_size < build_id_sz) { err = -ENAMETOOLONG; goto out; } karg.build_id_size = build_id_sz; } } if (karg.vma_name_size) { size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size); const struct path *path; const char *name_fmt; size_t name_sz = 0; get_vma_name(vma, &path, &name, &name_fmt); if (path || name_fmt || name) { name_buf = kmalloc(name_buf_sz, GFP_KERNEL); if (!name_buf) { err = -ENOMEM; goto out; } } if (path) { name = d_path(path, name_buf, name_buf_sz); if (IS_ERR(name)) { err = PTR_ERR(name); goto out; } name_sz = name_buf + name_buf_sz - name; } else if (name || name_fmt) { name_sz = 1 + snprintf(name_buf, name_buf_sz, name_fmt ?: "%s", name); name = name_buf; } if (name_sz > name_buf_sz) { err = -ENAMETOOLONG; goto out; } karg.vma_name_size = name_sz; } /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ query_vma_teardown(mm, vma); mmput(mm); if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), name, karg.vma_name_size)) { kfree(name_buf); return -EFAULT; } kfree(name_buf); if (karg.build_id_size && copy_to_user(u64_to_user_ptr(karg.build_id_addr), build_id_buf, karg.build_id_size)) return -EFAULT; if (copy_to_user(uarg, &karg, min_t(size_t, sizeof(karg), usize))) return -EFAULT; return 0; out: query_vma_teardown(mm, vma); mmput(mm); kfree(name_buf); return err; } static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; switch (cmd) { case PROCMAP_QUERY: return do_procmap_query(priv, (void __user *)arg); default: return -ENOIOCTLCMD; } } const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, .unlocked_ioctl = procfs_procmap_ioctl, .compat_ioctl = compat_ptr_ioctl, }; /* * Proportional Set Size(PSS): my share of RSS. * * PSS of a process is the count of pages it has in memory, where each * page is divided by the number of processes sharing it. So if a * process has 1000 pages all to itself, and 1000 shared with one other * process, its PSS will be 1500. * * To keep (accumulated) division errors low, we adopt a 64bit * fixed-point pss counter to minimize division errors. So (pss >> * PSS_SHIFT) would be the real byte count. * * A shift of 12 before division means (assuming 4K page size): * - 1M 3-user-pages add up to 8KB errors; * - supports mapcount up to 2^24, or 16M; * - supports PSS up to 2^52 bytes, or 4PB. */ #define PSS_SHIFT 12 #ifdef CONFIG_PROC_PAGE_MONITOR struct mem_size_stats { unsigned long resident; unsigned long shared_clean; unsigned long shared_dirty; unsigned long private_clean; unsigned long private_dirty; unsigned long referenced; unsigned long anonymous; unsigned long lazyfree; unsigned long anonymous_thp; unsigned long shmem_thp; unsigned long file_thp; unsigned long swap; unsigned long shared_hugetlb; unsigned long private_hugetlb; unsigned long ksm; u64 pss; u64 pss_anon; u64 pss_file; u64 pss_shmem; u64 pss_dirty; u64 pss_locked; u64 swap_pss; }; static void smaps_page_accumulate(struct mem_size_stats *mss, struct folio *folio, unsigned long size, unsigned long pss, bool dirty, bool locked, bool private) { mss->pss += pss; if (folio_test_anon(folio)) mss->pss_anon += pss; else if (folio_test_swapbacked(folio)) mss->pss_shmem += pss; else mss->pss_file += pss; if (locked) mss->pss_locked += pss; if (dirty || folio_test_dirty(folio)) { mss->pss_dirty += pss; if (private) mss->private_dirty += size; else mss->shared_dirty += size; } else { if (private) mss->private_clean += size; else mss->shared_clean += size; } } static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked, bool present) { struct folio *folio = page_folio(page); int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; /* * First accumulate quantities that depend only on |size| and the type * of the compound page. */ if (folio_test_anon(folio)) { mss->anonymous += size; if (!folio_test_swapbacked(folio) && !dirty && !folio_test_dirty(folio)) mss->lazyfree += size; } if (folio_test_ksm(folio)) mss->ksm += size; mss->resident += size; /* Accumulate the size in pages that have been accessed. */ if (young || folio_test_young(folio) || folio_test_referenced(folio)) mss->referenced += size; /* * Then accumulate quantities that may depend on sharing, or that may * differ page-by-page. * * refcount == 1 for present entries guarantees that the folio is mapped * exactly once. For large folios this implies that exactly one * PTE/PMD/... maps (a part of) this folio. * * Treat all non-present entries (where relying on the mapcount and * refcount doesn't make sense) as "maybe shared, but not sure how * often". We treat device private entries as being fake-present. * * Note that it would not be safe to read the mapcount especially for * pages referenced by migration entries, even with the PTL held. */ if (folio_ref_count(folio) == 1 || !present) { smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT, dirty, locked, present); return; } /* * We obtain a snapshot of the mapcount. Without holding the folio lock * this snapshot can be slightly wrong as we cannot always read the * mapcount atomically. */ for (i = 0; i < nr; i++, page++) { int mapcount = folio_precise_page_mapcount(folio, page); unsigned long pss = PAGE_SIZE << PSS_SHIFT; if (mapcount >= 2) pss /= mapcount; smaps_page_accumulate(mss, folio, PAGE_SIZE, pss, dirty, locked, mapcount < 2); } } #ifdef CONFIG_SHMEM static int smaps_pte_hole(unsigned long addr, unsigned long end, __always_unused int depth, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping, linear_page_index(vma, addr), linear_page_index(vma, end)); return 0; } #else #define smaps_pte_hole NULL #endif /* CONFIG_SHMEM */ static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk) { #ifdef CONFIG_SHMEM if (walk->ops->pte_hole) { /* depth is not used */ smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk); } #endif } static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; bool present = false, young = false, dirty = false; pte_t ptent = ptep_get(pte); if (pte_present(ptent)) { page = vm_normal_page(vma, addr, ptent); young = pte_young(ptent); dirty = pte_dirty(ptent); present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); if (!non_swap_entry(swpent)) { int mapcount; mss->swap += PAGE_SIZE; mapcount = swp_swapcount(swpent); if (mapcount >= 2) { u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; do_div(pss_delta, mapcount); mss->swap_pss += pss_delta; } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } } else if (is_pfn_swap_entry(swpent)) { if (is_device_private_entry(swpent)) present = true; page = pfn_swap_entry_to_page(swpent); } } else { smaps_pte_hole_lookup(addr, walk); return; } if (!page) return; smaps_account(mss, page, false, young, dirty, locked, present); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; bool present = false; struct folio *folio; if (pmd_present(*pmd)) { page = vm_normal_page_pmd(vma, addr, *pmd); present = true; } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); } if (IS_ERR_OR_NULL(page)) return; folio = page_folio(page); if (folio_test_anon(folio)) mss->anonymous_thp += HPAGE_PMD_SIZE; else if (folio_test_swapbacked(folio)) mss->shmem_thp += HPAGE_PMD_SIZE; else if (folio_is_zone_device(folio)) /* pass */; else mss->file_thp += HPAGE_PMD_SIZE; smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked, present); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct mm_walk *walk) { } #endif static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; pte_t *pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); goto out; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } for (; addr != end; pte++, addr += PAGE_SIZE) smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); out: cond_resched(); return 0; } static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) { /* * Don't forget to update Documentation/ on changes. */ static const char mnemonics[BITS_PER_LONG][2] = { /* * In case if we meet a flag we don't know about. */ [0 ... (BITS_PER_LONG-1)] = "??", [ilog2(VM_READ)] = "rd", [ilog2(VM_WRITE)] = "wr", [ilog2(VM_EXEC)] = "ex", [ilog2(VM_SHARED)] = "sh", [ilog2(VM_MAYREAD)] = "mr", [ilog2(VM_MAYWRITE)] = "mw", [ilog2(VM_MAYEXEC)] = "me", [ilog2(VM_MAYSHARE)] = "ms", [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", [ilog2(VM_RAND_READ)] = "rr", [ilog2(VM_DONTCOPY)] = "dc", [ilog2(VM_DONTEXPAND)] = "de", [ilog2(VM_LOCKONFAULT)] = "lf", [ilog2(VM_ACCOUNT)] = "ac", [ilog2(VM_NORESERVE)] = "nr", [ilog2(VM_HUGETLB)] = "ht", [ilog2(VM_SYNC)] = "sf", [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_WIPEONFORK)] = "wf", [ilog2(VM_DONTDUMP)] = "dd", #ifdef CONFIG_ARM64_BTI [ilog2(VM_ARM64_BTI)] = "bt", #endif #ifdef CONFIG_MEM_SOFT_DIRTY [ilog2(VM_SOFTDIRTY)] = "sd", #endif [ilog2(VM_MIXEDMAP)] = "mm", [ilog2(VM_HUGEPAGE)] = "hg", [ilog2(VM_NOHUGEPAGE)] = "nh", [ilog2(VM_MERGEABLE)] = "mg", [ilog2(VM_UFFD_MISSING)]= "um", [ilog2(VM_UFFD_WP)] = "uw", #ifdef CONFIG_ARM64_MTE [ilog2(VM_MTE)] = "mt", [ilog2(VM_MTE_ALLOWED)] = "", #endif #ifdef CONFIG_ARCH_HAS_PKEYS /* These come out via ProtectionKey: */ [ilog2(VM_PKEY_BIT0)] = "", [ilog2(VM_PKEY_BIT1)] = "", [ilog2(VM_PKEY_BIT2)] = "", #if VM_PKEY_BIT3 [ilog2(VM_PKEY_BIT3)] = "", #endif #if VM_PKEY_BIT4 [ilog2(VM_PKEY_BIT4)] = "", #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR [ilog2(VM_UFFD_MINOR)] = "ui", #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ #ifdef CONFIG_X86_USER_SHADOW_STACK [ilog2(VM_SHADOW_STACK)] = "ss", #endif #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) [ilog2(VM_DROPPABLE)] = "dp", #endif #ifdef CONFIG_64BIT [ilog2(VM_SEALED)] = "sl", #endif }; size_t i; seq_puts(m, "VmFlags: "); for (i = 0; i < BITS_PER_LONG; i++) { if (!mnemonics[i][0]) continue; if (vma->vm_flags & (1UL << i)) { seq_putc(m, mnemonics[i][0]); seq_putc(m, mnemonics[i][1]); seq_putc(m, ' '); } } seq_putc(m, '\n'); } #ifdef CONFIG_HUGETLB_PAGE static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; bool present = false; if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); if (is_pfn_swap_entry(swpent)) folio = pfn_swap_entry_folio(swpent); } if (folio) { /* We treat non-present entries as "maybe shared". */ if (!present || folio_likely_mapped_shared(folio) || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); } return 0; } #else #define smaps_hugetlb_range NULL #endif /* HUGETLB_PAGE */ static const struct mm_walk_ops smaps_walk_ops = { .pmd_entry = smaps_pte_range, .hugetlb_entry = smaps_hugetlb_range, .walk_lock = PGWALK_RDLOCK, }; static const struct mm_walk_ops smaps_shmem_walk_ops = { .pmd_entry = smaps_pte_range, .hugetlb_entry = smaps_hugetlb_range, .pte_hole = smaps_pte_hole, .walk_lock = PGWALK_RDLOCK, }; /* * Gather mem stats from @vma with the indicated beginning * address @start, and keep them in @mss. * * Use vm_start of @vma as the beginning address if @start is 0. */ static void smap_gather_stats(struct vm_area_struct *vma, struct mem_size_stats *mss, unsigned long start) { const struct mm_walk_ops *ops = &smaps_walk_ops; /* Invalid start */ if (start >= vma->vm_end) return; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { /* * For shared or readonly shmem mappings we know that all * swapped out pages belong to the shmem object, and we can * obtain the swap value much more efficiently. For private * writable mappings, we might have COW pages that are * not affected by the parent swapped out pages of the shmem * object, so we have to distinguish them during the page walk. * Unless we know that the shmem object (or the part mapped by * our VMA) has no swapped out pages at all. */ unsigned long shmem_swapped = shmem_swap_usage(vma); if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) || !(vma->vm_flags & VM_WRITE))) { mss->swap += shmem_swapped; } else { ops = &smaps_shmem_walk_ops; } } /* mmap_lock is held in m_start */ if (!start) walk_page_vma(vma, ops, mss); else walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss); } #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) >> 10, 8) /* Show the contents common for smaps and smaps_rollup */ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, bool rollup_mode) { SEQ_PUT_DEC("Rss: ", mss->resident); SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nPss_Dirty: ", mss->pss_dirty >> PSS_SHIFT); if (rollup_mode) { /* * These are meaningful only for smaps_rollup, otherwise two of * them are zero, and the other one is the same as Pss. */ SEQ_PUT_DEC(" kB\nPss_Anon: ", mss->pss_anon >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nPss_File: ", mss->pss_file >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nPss_Shmem: ", mss->pss_shmem >> PSS_SHIFT); } SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty); SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced); SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous); SEQ_PUT_DEC(" kB\nKSM: ", mss->ksm); SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7); SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); SEQ_PUT_DEC(" kB\nSwapPss: ", mss->swap_pss >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nLocked: ", mss->pss_locked >> PSS_SHIFT); seq_puts(m, " kB\n"); } static int show_smap(struct seq_file *m, void *v) { struct vm_area_struct *vma = v; struct mem_size_stats mss = {}; smap_gather_stats(vma, &mss, 0); show_map_vma(m, vma); SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start); SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma)); SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); seq_puts(m, " kB\n"); __show_smap(m, &mss, false); seq_printf(m, "THPeligible: %8u\n", !!thp_vma_allowable_orders(vma, vma->vm_flags, TVA_SMAPS | TVA_ENFORCE_SYSFS, THP_ORDERS_ALL)); if (arch_pkeys_enabled()) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); show_smap_vma_flags(m, vma); return 0; } static int show_smaps_rollup(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct mem_size_stats mss = {}; struct mm_struct *mm = priv->mm; struct vm_area_struct *vma; unsigned long vma_start = 0, last_vma_end = 0; int ret = 0; VMA_ITERATOR(vmi, mm, 0); priv->task = get_proc_task(priv->inode); if (!priv->task) return -ESRCH; if (!mm || !mmget_not_zero(mm)) { ret = -ESRCH; goto out_put_task; } ret = mmap_read_lock_killable(mm); if (ret) goto out_put_mm; hold_task_mempolicy(priv); vma = vma_next(&vmi); if (unlikely(!vma)) goto empty_set; vma_start = vma->vm_start; do { smap_gather_stats(vma, &mss, 0); last_vma_end = vma->vm_end; /* * Release mmap_lock temporarily if someone wants to * access it for write request. */ if (mmap_lock_is_contended(mm)) { vma_iter_invalidate(&vmi); mmap_read_unlock(mm); ret = mmap_read_lock_killable(mm); if (ret) { release_task_mempolicy(priv); goto out_put_mm; } /* * After dropping the lock, there are four cases to * consider. See the following example for explanation. * * +------+------+-----------+ * | VMA1 | VMA2 | VMA3 | * +------+------+-----------+ * | | | | * 4k 8k 16k 400k * * Suppose we drop the lock after reading VMA2 due to * contention, then we get: * * last_vma_end = 16k * * 1) VMA2 is freed, but VMA3 exists: * * vma_next(vmi) will return VMA3. * In this case, just continue from VMA3. * * 2) VMA2 still exists: * * vma_next(vmi) will return VMA3. * In this case, just continue from VMA3. * * 3) No more VMAs can be found: * * vma_next(vmi) will return NULL. * No more things to do, just break. * * 4) (last_vma_end - 1) is the middle of a vma (VMA'): * * vma_next(vmi) will return VMA' whose range * contains last_vma_end. * Iterate VMA' from last_vma_end. */ vma = vma_next(&vmi); /* Case 3 above */ if (!vma) break; /* Case 1 and 2 above */ if (vma->vm_start >= last_vma_end) { smap_gather_stats(vma, &mss, 0); last_vma_end = vma->vm_end; continue; } /* Case 4 above */ if (vma->vm_end > last_vma_end) { smap_gather_stats(vma, &mss, last_vma_end); last_vma_end = vma->vm_end; } } } for_each_vma(vmi, vma); empty_set: show_vma_header_prefix(m, vma_start, last_vma_end, 0, 0, 0, 0); seq_pad(m, ' '); seq_puts(m, "[rollup]\n"); __show_smap(m, &mss, true); release_task_mempolicy(priv); mmap_read_unlock(mm); out_put_mm: mmput(mm); out_put_task: put_task_struct(priv->task); priv->task = NULL; return ret; } #undef SEQ_PUT_DEC static const struct seq_operations proc_pid_smaps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_smap }; static int pid_smaps_open(struct inode *inode, struct file *file) { return do_maps_open(inode, file, &proc_pid_smaps_op); } static int smaps_rollup_open(struct inode *inode, struct file *file) { int ret; struct proc_maps_private *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT); if (!priv) return -ENOMEM; ret = single_open(file, show_smaps_rollup, priv); if (ret) goto out_free; priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); if (IS_ERR(priv->mm)) { ret = PTR_ERR(priv->mm); single_release(inode, file); goto out_free; } return 0; out_free: kfree(priv); return ret; } static int smaps_rollup_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; if (priv->mm) mmdrop(priv->mm); kfree(priv); return single_release(inode, file); } const struct file_operations proc_pid_smaps_operations = { .open = pid_smaps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, }; const struct file_operations proc_pid_smaps_rollup_operations = { .open = smaps_rollup_open, .read = seq_read, .llseek = seq_lseek, .release = smaps_rollup_release, }; enum clear_refs_types { CLEAR_REFS_ALL = 1, CLEAR_REFS_ANON, CLEAR_REFS_MAPPED, CLEAR_REFS_SOFT_DIRTY, CLEAR_REFS_MM_HIWATER_RSS, CLEAR_REFS_LAST, }; struct clear_refs_private { enum clear_refs_types type; }; #ifdef CONFIG_MEM_SOFT_DIRTY static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { struct folio *folio; if (!pte_write(pte)) return false; if (!is_cow_mapping(vma->vm_flags)) return false; if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))) return false; folio = vm_normal_folio(vma, addr, pte); if (!folio) return false; return folio_maybe_dma_pinned(folio); } static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the * Documentation/admin-guide/mm/soft-dirty.rst for full description * of how soft-dirty works. */ pte_t ptent = ptep_get(pte); if (pte_present(ptent)) { pte_t old_pte; if (pte_is_pinned(vma, addr, ptent)) return; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { ptent = pte_swp_clear_soft_dirty(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); } } #else static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { } #endif #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { pmd_t old, pmd = *pmdp; if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ old = pmdp_invalidate(vma, addr, pmdp); if (pmd_dirty(old)) pmd = pmd_mkdirty(pmd); if (pmd_young(old)) pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); pmd = pmd_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { pmd = pmd_swp_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } } #else static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { } #endif static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; pte_t *pte, ptent; spinlock_t *ptl; struct folio *folio; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty_pmd(vma, addr, pmd); goto out; } if (!pmd_present(*pmd)) goto out; folio = pmd_folio(*pmd); /* Clear accessed and referenced bits. */ pmdp_test_and_clear_young(vma, addr, pmd); folio_test_clear_young(folio); folio_clear_referenced(folio); out: spin_unlock(ptl); return 0; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty(vma, addr, pte); continue; } if (!pte_present(ptent)) continue; folio = vm_normal_folio(vma, addr, ptent); if (!folio) continue; /* Clear accessed and referenced bits. */ ptep_test_and_clear_young(vma, addr, pte); folio_test_clear_young(folio); folio_clear_referenced(folio); } pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; } static int clear_refs_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; if (vma->vm_flags & VM_PFNMAP) return 1; /* * Writing 1 to /proc/pid/clear_refs affects all pages. * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. * Writing 3 to /proc/pid/clear_refs only affects file mapped pages. * Writing 4 to /proc/pid/clear_refs affects all pages. */ if (cp->type == CLEAR_REFS_ANON && vma->vm_file) return 1; if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file) return 1; return 0; } static const struct mm_walk_ops clear_refs_walk_ops = { .pmd_entry = clear_refs_pte_range, .test_walk = clear_refs_test_walk, .walk_lock = PGWALK_WRLOCK, }; static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF] = {}; struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; int itype; int rv; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; rv = kstrtoint(strstrip(buffer), 10, &itype); if (rv < 0) return rv; type = (enum clear_refs_types)itype; if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mm = get_task_mm(task); if (mm) { VMA_ITERATOR(vmi, mm, 0); struct mmu_notifier_range range; struct clear_refs_private cp = { .type = type, }; if (mmap_write_lock_killable(mm)) { count = -EINTR; goto out_mm; } if (type == CLEAR_REFS_MM_HIWATER_RSS) { /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); goto out_unlock; } if (type == CLEAR_REFS_SOFT_DIRTY) { for_each_vma(vmi, vma) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; vm_flags_clear(vma, VM_SOFTDIRTY); vma_set_page_prot(vma); } inc_tlb_flush_pending(mm); mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, 0, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp); if (type == CLEAR_REFS_SOFT_DIRTY) { mmu_notifier_invalidate_range_end(&range); flush_tlb_mm(mm); dec_tlb_flush_pending(mm); } out_unlock: mmap_write_unlock(mm); out_mm: mmput(mm); } put_task_struct(task); return count; } const struct file_operations proc_clear_refs_operations = { .write = clear_refs_write, .llseek = noop_llseek, }; typedef struct { u64 pme; } pagemap_entry_t; struct pagemapread { int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ pagemap_entry_t *buffer; bool show_pfn; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) #define PAGEMAP_WALK_MASK (PMD_MASK) #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) #define PM_PFRAME_BITS 55 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0) #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) #define PM_END_OF_BUFFER 1 static inline pagemap_entry_t make_pme(u64 frame, u64 flags) { return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags }; } static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm) { pm->buffer[pm->pos++] = *pme; if (pm->pos >= pm->len) return PM_END_OF_BUFFER; return 0; } static int pagemap_pte_hole(unsigned long start, unsigned long end, __always_unused int depth, struct mm_walk *walk) { struct pagemapread *pm = walk->private; unsigned long addr = start; int err = 0; while (addr < end) { struct vm_area_struct *vma = find_vma(walk->mm, addr); pagemap_entry_t pme = make_pme(0, 0); /* End of address space hole, which we mark as non-present. */ unsigned long hole_end; if (vma) hole_end = min(end, vma->vm_start); else hole_end = end; for (; addr < hole_end; addr += PAGE_SIZE) { err = add_to_pagemap(&pme, pm); if (err) goto out; } if (!vma) break; /* Addresses in the VMA. */ if (vma->vm_flags & VM_SOFTDIRTY) pme = make_pme(0, PM_SOFT_DIRTY); for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { err = add_to_pagemap(&pme, pm); if (err) goto out; } } out: return err; } static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { u64 frame = 0, flags = 0; struct page *page = NULL; struct folio *folio; if (pte_present(pte)) { if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; page = vm_normal_page(vma, addr, pte); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; if (pte_uffd_wp(pte)) flags |= PM_UFFD_WP; } else if (is_swap_pte(pte)) { swp_entry_t entry; if (pte_swp_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; if (pte_swp_uffd_wp(pte)) flags |= PM_UFFD_WP; entry = pte_to_swp_entry(pte); if (pm->show_pfn) { pgoff_t offset; /* * For PFN swap offsets, keeping the offset field * to be PFN only to be compatible with old smaps. */ if (is_pfn_swap_entry(entry)) offset = swp_offset_pfn(entry); else offset = swp_offset(entry); frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); if (pte_marker_entry_uffd_wp(entry)) flags |= PM_UFFD_WP; } if (page) { folio = page_folio(page); if (!folio_test_anon(folio)) flags |= PM_FILE; if ((flags & PM_PRESENT) && folio_precise_page_mapcount(folio, page) == 1) flags |= PM_MMAP_EXCLUSIVE; } if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; return make_pme(frame, flags); } static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; struct pagemapread *pm = walk->private; spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; u64 flags = 0, frame = 0; pmd_t pmd = *pmdp; struct page *page = NULL; struct folio *folio = NULL; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; if (pmd_present(pmd)) { page = pmd_page(pmd); flags |= PM_PRESENT; if (pmd_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; if (pmd_uffd_wp(pmd)) flags |= PM_UFFD_WP; if (pm->show_pfn) frame = pmd_pfn(pmd) + idx; } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); unsigned long offset; if (pm->show_pfn) { if (is_pfn_swap_entry(entry)) offset = swp_offset_pfn(entry) + idx; else offset = swp_offset(entry) + idx; frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; VM_BUG_ON(!is_pmd_migration_entry(pmd)); page = pfn_swap_entry_to_page(entry); } #endif if (page) { folio = page_folio(page); if (!folio_test_anon(folio)) flags |= PM_FILE; } for (; addr != end; addr += PAGE_SIZE, idx++) { unsigned long cur_flags = flags; pagemap_entry_t pme; if (folio && (flags & PM_PRESENT) && folio_precise_page_mapcount(folio, page + idx) == 1) cur_flags |= PM_MMAP_EXCLUSIVE; pme = make_pme(frame, cur_flags); err = add_to_pagemap(&pme, pm); if (err) break; if (pm->show_pfn) { if (flags & PM_PRESENT) frame++; else if (flags & PM_SWAP) frame += (1 << MAX_SWAPFILES_SHIFT); } } spin_unlock(ptl); return err; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * We can assume that @vma always points to a valid one and @end never * goes beyond vma->vm_end. */ orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return err; } for (; addr < end; pte++, addr += PAGE_SIZE) { pagemap_entry_t pme; pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte)); err = add_to_pagemap(&pme, pm); if (err) break; } pte_unmap_unlock(orig_pte, ptl); cond_resched(); return err; } #ifdef CONFIG_HUGETLB_PAGE /* This function walks within one hugetlb entry in the single call */ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct pagemapread *pm = walk->private; struct vm_area_struct *vma = walk->vma; u64 flags = 0, frame = 0; int err = 0; pte_t pte; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); if (!folio_test_anon(folio)) flags |= PM_FILE; if (!folio_likely_mapped_shared(folio) && !hugetlb_pmd_shared(ptep)) flags |= PM_MMAP_EXCLUSIVE; if (huge_pte_uffd_wp(pte)) flags |= PM_UFFD_WP; flags |= PM_PRESENT; if (pm->show_pfn) frame = pte_pfn(pte) + ((addr & ~hmask) >> PAGE_SHIFT); } else if (pte_swp_uffd_wp_any(pte)) { flags |= PM_UFFD_WP; } for (; addr != end; addr += PAGE_SIZE) { pagemap_entry_t pme = make_pme(frame, flags); err = add_to_pagemap(&pme, pm); if (err) return err; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; } cond_resched(); return err; } #else #define pagemap_hugetlb_range NULL #endif /* HUGETLB_PAGE */ static const struct mm_walk_ops pagemap_ops = { .pmd_entry = pagemap_pmd_range, .pte_hole = pagemap_pte_hole, .hugetlb_entry = pagemap_hugetlb_range, .walk_lock = PGWALK_RDLOCK, }; /* * /proc/pid/pagemap - an array mapping virtual pages to pfns * * For each page in the address space, this file contains one 64-bit entry * consisting of the following: * * Bits 0-54 page frame number (PFN) if present * Bits 0-4 swap type if swapped * Bits 5-54 swap offset if swapped * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) * Bit 56 page exclusively mapped * Bit 57 pte is uffd-wp write-protected * Bits 58-60 zero * Bit 61 page is file-page or shared-anon * Bit 62 page swapped * Bit 63 page present * * If the page is not present but in swap, then the PFN contains an * encoding of the swap file number and the page's offset into the * swap. Unmapped pages return a null PFN. This allows determining * precisely which pages are mapped (or in swap) and comparing mapped * pages between processes. * * Efficient users of this interface will use /proc/pid/maps to * determine which areas of memory are actually mapped and llseek to * skip over unmapped regions. */ static ssize_t pagemap_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct mm_struct *mm = file->private_data; struct pagemapread pm; unsigned long src; unsigned long svpfn; unsigned long start_vaddr; unsigned long end_vaddr; int ret = 0, copied = 0; if (!mm || !mmget_not_zero(mm)) goto out; ret = -EINVAL; /* file position must be aligned */ if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) goto out_mm; ret = 0; if (!count) goto out_mm; /* do not disclose physical addresses: attack vector */ pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN); pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL); ret = -ENOMEM; if (!pm.buffer) goto out_mm; src = *ppos; svpfn = src / PM_ENTRY_BYTES; end_vaddr = mm->task_size; /* watch out for wraparound */ start_vaddr = end_vaddr; if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) { unsigned long end; ret = mmap_read_lock_killable(mm); if (ret) goto out_free; start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT); mmap_read_unlock(mm); end = start_vaddr + ((count / PM_ENTRY_BYTES) << PAGE_SHIFT); if (end >= start_vaddr && end < mm->task_size) end_vaddr = end; } /* Ensure the address is inside the task */ if (start_vaddr > mm->task_size) start_vaddr = end_vaddr; ret = 0; while (count && (start_vaddr < end_vaddr)) { int len; unsigned long end; pm.pos = 0; end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; ret = mmap_read_lock_killable(mm); if (ret) goto out_free; ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); mmap_read_unlock(mm); start_vaddr = end; len = min(count, PM_ENTRY_BYTES * pm.pos); if (copy_to_user(buf, pm.buffer, len)) { ret = -EFAULT; goto out_free; } copied += len; buf += len; count -= len; } *ppos += copied; if (!ret || ret == PM_END_OF_BUFFER) ret = copied; out_free: kfree(pm.buffer); out_mm: mmput(mm); out: return ret; } static int pagemap_open(struct inode *inode, struct file *file) { struct mm_struct *mm; mm = proc_mem_open(inode, PTRACE_MODE_READ); if (IS_ERR(mm)) return PTR_ERR(mm); file->private_data = mm; return 0; } static int pagemap_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) mmdrop(mm); return 0; } #define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \ PAGE_IS_FILE | PAGE_IS_PRESENT | \ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY) #define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC) struct pagemap_scan_private { struct pm_scan_arg arg; unsigned long masks_of_interest, cur_vma_category; struct page_region *vec_buf; unsigned long vec_buf_len, vec_buf_index, found_pages; struct page_region __user *vec_out; }; static unsigned long pagemap_page_category(struct pagemap_scan_private *p, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { unsigned long categories = 0; if (pte_present(pte)) { struct page *page; categories |= PAGE_IS_PRESENT; if (!pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; if (p->masks_of_interest & PAGE_IS_FILE) { page = vm_normal_page(vma, addr, pte); if (page && !PageAnon(page)) categories |= PAGE_IS_FILE; } if (is_zero_pfn(pte_pfn(pte))) categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { swp_entry_t swp; categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; if (p->masks_of_interest & PAGE_IS_FILE) { swp = pte_to_swp_entry(pte); if (is_pfn_swap_entry(swp) && !folio_test_anon(pfn_swap_entry_folio(swp))) categories |= PAGE_IS_FILE; } if (pte_swp_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } return categories; } static void make_uffd_wp_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t ptent) { if (pte_present(ptent)) { pte_t old_pte; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_mkuffd_wp(old_pte); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { ptent = pte_swp_mkuffd_wp(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); } else { set_pte_at(vma->vm_mm, addr, pte, make_pte_marker(PTE_MARKER_UFFD_WP)); } } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd) { unsigned long categories = PAGE_IS_HUGE; if (pmd_present(pmd)) { struct page *page; categories |= PAGE_IS_PRESENT; if (!pmd_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; if (p->masks_of_interest & PAGE_IS_FILE) { page = vm_normal_page_pmd(vma, addr, pmd); if (page && !PageAnon(page)) categories |= PAGE_IS_FILE; } if (is_zero_pfn(pmd_pfn(pmd))) categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pmd(pmd)) { swp_entry_t swp; categories |= PAGE_IS_SWAPPED; if (!pmd_swp_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; if (pmd_swp_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; if (p->masks_of_interest & PAGE_IS_FILE) { swp = pmd_to_swp_entry(pmd); if (is_pfn_swap_entry(swp) && !folio_test_anon(pfn_swap_entry_folio(swp))) categories |= PAGE_IS_FILE; } } return categories; } static void make_uffd_wp_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { pmd_t old, pmd = *pmdp; if (pmd_present(pmd)) { old = pmdp_invalidate_ad(vma, addr, pmdp); pmd = pmd_mkuffd_wp(old); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_HUGETLB_PAGE static unsigned long pagemap_hugetlb_category(pte_t pte) { unsigned long categories = PAGE_IS_HUGE; /* * According to pagemap_hugetlb_range(), file-backed HugeTLB * page cannot be swapped. So PAGE_IS_FILE is not checked for * swapped pages. */ if (pte_present(pte)) { categories |= PAGE_IS_PRESENT; if (!huge_pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; if (!PageAnon(pte_page(pte))) categories |= PAGE_IS_FILE; if (is_zero_pfn(pte_pfn(pte))) categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; if (pte_swp_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } return categories; } static void make_uffd_wp_huge_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t ptent) { unsigned long psize; if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent)) return; psize = huge_page_size(hstate_vma(vma)); if (is_hugetlb_entry_migration(ptent)) set_huge_pte_at(vma->vm_mm, addr, ptep, pte_swp_mkuffd_wp(ptent), psize); else if (!huge_pte_none(ptent)) huge_ptep_modify_prot_commit(vma, addr, ptep, ptent, huge_pte_mkuffd_wp(ptent)); else set_huge_pte_at(vma->vm_mm, addr, ptep, make_pte_marker(PTE_MARKER_UFFD_WP), psize); } #endif /* CONFIG_HUGETLB_PAGE */ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static void pagemap_scan_backout_range(struct pagemap_scan_private *p, unsigned long addr, unsigned long end) { struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index]; if (cur_buf->start != addr) cur_buf->end = addr; else cur_buf->start = cur_buf->end = 0; p->found_pages -= (end - addr) / PAGE_SIZE; } #endif static bool pagemap_scan_is_interesting_page(unsigned long categories, const struct pagemap_scan_private *p) { categories ^= p->arg.category_inverted; if ((categories & p->arg.category_mask) != p->arg.category_mask) return false; if (p->arg.category_anyof_mask && !(categories & p->arg.category_anyof_mask)) return false; return true; } static bool pagemap_scan_is_interesting_vma(unsigned long categories, const struct pagemap_scan_private *p) { unsigned long required = p->arg.category_mask & PAGE_IS_WPALLOWED; categories ^= p->arg.category_inverted; if ((categories & required) != required) return false; return true; } static int pagemap_scan_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long vma_category = 0; bool wp_allowed = userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma); if (!wp_allowed) { /* User requested explicit failure over wp-async capability */ if (p->arg.flags & PM_SCAN_CHECK_WPASYNC) return -EPERM; /* * User requires wr-protect, and allows silently skipping * unsupported vmas. */ if (p->arg.flags & PM_SCAN_WP_MATCHING) return 1; /* * Then the request doesn't involve wr-protects at all, * fall through to the rest checks, and allow vma walk. */ } if (vma->vm_flags & VM_PFNMAP) return 1; if (wp_allowed) vma_category |= PAGE_IS_WPALLOWED; if (vma->vm_flags & VM_SOFTDIRTY) vma_category |= PAGE_IS_SOFT_DIRTY; if (!pagemap_scan_is_interesting_vma(vma_category, p)) return 1; p->cur_vma_category = vma_category; return 0; } static bool pagemap_scan_push_range(unsigned long categories, struct pagemap_scan_private *p, unsigned long addr, unsigned long end) { struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index]; /* * When there is no output buffer provided at all, the sentinel values * won't match here. There is no other way for `cur_buf->end` to be * non-zero other than it being non-empty. */ if (addr == cur_buf->end && categories == cur_buf->categories) { cur_buf->end = end; return true; } if (cur_buf->end) { if (p->vec_buf_index >= p->vec_buf_len - 1) return false; cur_buf = &p->vec_buf[++p->vec_buf_index]; } cur_buf->start = addr; cur_buf->end = end; cur_buf->categories = categories; return true; } static int pagemap_scan_output(unsigned long categories, struct pagemap_scan_private *p, unsigned long addr, unsigned long *end) { unsigned long n_pages, total_pages; int ret = 0; if (!p->vec_buf) return 0; categories &= p->arg.return_mask; n_pages = (*end - addr) / PAGE_SIZE; if (check_add_overflow(p->found_pages, n_pages, &total_pages) || total_pages > p->arg.max_pages) { size_t n_too_much = total_pages - p->arg.max_pages; *end -= n_too_much * PAGE_SIZE; n_pages -= n_too_much; ret = -ENOSPC; } if (!pagemap_scan_push_range(categories, p, addr, *end)) { *end = addr; n_pages = 0; ret = -ENOSPC; } p->found_pages += n_pages; if (ret) p->arg.walk_end = *end; return ret; } static int pagemap_scan_thp_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long categories; spinlock_t *ptl; int ret = 0; ptl = pmd_trans_huge_lock(pmd, vma); if (!ptl) return -ENOENT; categories = p->cur_vma_category | pagemap_thp_category(p, vma, start, *pmd); if (!pagemap_scan_is_interesting_page(categories, p)) goto out_unlock; ret = pagemap_scan_output(categories, p, start, &end); if (start == end) goto out_unlock; if (~p->arg.flags & PM_SCAN_WP_MATCHING) goto out_unlock; if (~categories & PAGE_IS_WRITTEN) goto out_unlock; /* * Break huge page into small pages if the WP operation * needs to be performed on a portion of the huge page. */ if (end != start + HPAGE_SIZE) { spin_unlock(ptl); split_huge_pmd(vma, pmd, start); pagemap_scan_backout_range(p, start, end); /* Report as if there was no THP */ return -ENOENT; } make_uffd_wp_pmd(vma, start, pmd); flush_tlb_range(vma, start, end); out_unlock: spin_unlock(ptl); return ret; #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ return -ENOENT; #endif } static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long addr, flush_end = 0; pte_t *pte, *start_pte; spinlock_t *ptl; int ret; arch_enter_lazy_mmu_mode(); ret = pagemap_scan_thp_entry(pmd, start, end, walk); if (ret != -ENOENT) { arch_leave_lazy_mmu_mode(); return ret; } ret = 0; start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); if (!pte) { arch_leave_lazy_mmu_mode(); walk->action = ACTION_AGAIN; return 0; } if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { pte_t ptent = ptep_get(pte); if ((pte_present(ptent) && pte_uffd_wp(ptent)) || pte_swp_uffd_wp_any(ptent)) continue; make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = addr + PAGE_SIZE; } goto flush_and_return; } if (!p->arg.category_anyof_mask && !p->arg.category_inverted && p->arg.category_mask == PAGE_IS_WRITTEN && p->arg.return_mask == PAGE_IS_WRITTEN) { for (addr = start; addr < end; pte++, addr += PAGE_SIZE) { unsigned long next = addr + PAGE_SIZE; pte_t ptent = ptep_get(pte); if ((pte_present(ptent) && pte_uffd_wp(ptent)) || pte_swp_uffd_wp_any(ptent)) continue; ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN, p, addr, &next); if (next == addr) break; if (~p->arg.flags & PM_SCAN_WP_MATCHING) continue; make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; } goto flush_and_return; } for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { pte_t ptent = ptep_get(pte); unsigned long categories = p->cur_vma_category | pagemap_page_category(p, vma, addr, ptent); unsigned long next = addr + PAGE_SIZE; if (!pagemap_scan_is_interesting_page(categories, p)) continue; ret = pagemap_scan_output(categories, p, addr, &next); if (next == addr) break; if (~p->arg.flags & PM_SCAN_WP_MATCHING) continue; if (~categories & PAGE_IS_WRITTEN) continue; make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; } flush_and_return: if (flush_end) flush_tlb_range(vma, start, addr); pte_unmap_unlock(start_pte, ptl); arch_leave_lazy_mmu_mode(); cond_resched(); return ret; } #ifdef CONFIG_HUGETLB_PAGE static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask, unsigned long start, unsigned long end, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long categories; spinlock_t *ptl; int ret = 0; pte_t pte; if (~p->arg.flags & PM_SCAN_WP_MATCHING) { /* Go the short route when not write-protecting pages. */ pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) return 0; return pagemap_scan_output(categories, p, start, &end); } i_mmap_lock_write(vma->vm_file->f_mapping); ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep); pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) goto out_unlock; ret = pagemap_scan_output(categories, p, start, &end); if (start == end) goto out_unlock; if (~categories & PAGE_IS_WRITTEN) goto out_unlock; if (end != start + HPAGE_SIZE) { /* Partial HugeTLB page WP isn't possible. */ pagemap_scan_backout_range(p, start, end); p->arg.walk_end = start; ret = 0; goto out_unlock; } make_uffd_wp_huge_pte(vma, start, ptep, pte); flush_hugetlb_tlb_range(vma, start, end); out_unlock: spin_unlock(ptl); i_mmap_unlock_write(vma->vm_file->f_mapping); return ret; } #else #define pagemap_scan_hugetlb_entry NULL #endif static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end, int depth, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; int ret, err; if (!vma || !pagemap_scan_is_interesting_page(p->cur_vma_category, p)) return 0; ret = pagemap_scan_output(p->cur_vma_category, p, addr, &end); if (addr == end) return ret; if (~p->arg.flags & PM_SCAN_WP_MATCHING) return ret; err = uffd_wp_range(vma, addr, end - addr, true); if (err < 0) ret = err; return ret; } static const struct mm_walk_ops pagemap_scan_ops = { .test_walk = pagemap_scan_test_walk, .pmd_entry = pagemap_scan_pmd_entry, .pte_hole = pagemap_scan_pte_hole, .hugetlb_entry = pagemap_scan_hugetlb_entry, }; static int pagemap_scan_get_args(struct pm_scan_arg *arg, unsigned long uarg) { if (copy_from_user(arg, (void __user *)uarg, sizeof(*arg))) return -EFAULT; if (arg->size != sizeof(struct pm_scan_arg)) return -EINVAL; /* Validate requested features */ if (arg->flags & ~PM_SCAN_FLAGS) return -EINVAL; if ((arg->category_inverted | arg->category_mask | arg->category_anyof_mask | arg->return_mask) & ~PM_SCAN_CATEGORIES) return -EINVAL; arg->start = untagged_addr((unsigned long)arg->start); arg->end = untagged_addr((unsigned long)arg->end); arg->vec = untagged_addr((unsigned long)arg->vec); /* Validate memory pointers */ if (!IS_ALIGNED(arg->start, PAGE_SIZE)) return -EINVAL; if (!access_ok((void __user *)(long)arg->start, arg->end - arg->start)) return -EFAULT; if (!arg->vec && arg->vec_len) return -EINVAL; if (arg->vec && !access_ok((void __user *)(long)arg->vec, arg->vec_len * sizeof(struct page_region))) return -EFAULT; /* Fixup default values */ arg->end = ALIGN(arg->end, PAGE_SIZE); arg->walk_end = 0; if (!arg->max_pages) arg->max_pages = ULONG_MAX; return 0; } static int pagemap_scan_writeback_args(struct pm_scan_arg *arg, unsigned long uargl) { struct pm_scan_arg __user *uarg = (void __user *)uargl; if (copy_to_user(&uarg->walk_end, &arg->walk_end, sizeof(arg->walk_end))) return -EFAULT; return 0; } static int pagemap_scan_init_bounce_buffer(struct pagemap_scan_private *p) { if (!p->arg.vec_len) return 0; p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT, p->arg.vec_len); p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf), GFP_KERNEL); if (!p->vec_buf) return -ENOMEM; p->vec_buf->start = p->vec_buf->end = 0; p->vec_out = (struct page_region __user *)(long)p->arg.vec; return 0; } static long pagemap_scan_flush_buffer(struct pagemap_scan_private *p) { const struct page_region *buf = p->vec_buf; long n = p->vec_buf_index; if (!p->vec_buf) return 0; if (buf[n].end != buf[n].start) n++; if (!n) return 0; if (copy_to_user(p->vec_out, buf, n * sizeof(*buf))) return -EFAULT; p->arg.vec_len -= n; p->vec_out += n; p->vec_buf_index = 0; p->vec_buf_len = min_t(size_t, p->vec_buf_len, p->arg.vec_len); p->vec_buf->start = p->vec_buf->end = 0; return n; } static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg) { struct pagemap_scan_private p = {0}; unsigned long walk_start; size_t n_ranges_out = 0; int ret; ret = pagemap_scan_get_args(&p.arg, uarg); if (ret) return ret; p.masks_of_interest = p.arg.category_mask | p.arg.category_anyof_mask | p.arg.return_mask; ret = pagemap_scan_init_bounce_buffer(&p); if (ret) return ret; for (walk_start = p.arg.start; walk_start < p.arg.end; walk_start = p.arg.walk_end) { struct mmu_notifier_range range; long n_out; if (fatal_signal_pending(current)) { ret = -EINTR; break; } ret = mmap_read_lock_killable(mm); if (ret) break; /* Protection change for the range is going to happen. */ if (p.arg.flags & PM_SCAN_WP_MATCHING) { mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0, mm, walk_start, p.arg.end); mmu_notifier_invalidate_range_start(&range); } ret = walk_page_range(mm, walk_start, p.arg.end, &pagemap_scan_ops, &p); if (p.arg.flags & PM_SCAN_WP_MATCHING) mmu_notifier_invalidate_range_end(&range); mmap_read_unlock(mm); n_out = pagemap_scan_flush_buffer(&p); if (n_out < 0) ret = n_out; else n_ranges_out += n_out; if (ret != -ENOSPC) break; if (p.arg.vec_len == 0 || p.found_pages == p.arg.max_pages) break; } /* ENOSPC signifies early stop (buffer full) from the walk. */ if (!ret || ret == -ENOSPC) ret = n_ranges_out; /* The walk_end isn't set when ret is zero */ if (!p.arg.walk_end) p.arg.walk_end = p.arg.end; if (pagemap_scan_writeback_args(&p.arg, uarg)) ret = -EFAULT; kfree(p.vec_buf); return ret; } static long do_pagemap_cmd(struct file *file, unsigned int cmd, unsigned long arg) { struct mm_struct *mm = file->private_data; switch (cmd) { case PAGEMAP_SCAN: return do_pagemap_scan(mm, arg); default: return -EINVAL; } } const struct file_operations proc_pagemap_operations = { .llseek = mem_lseek, /* borrow this */ .read = pagemap_read, .open = pagemap_open, .release = pagemap_release, .unlocked_ioctl = do_pagemap_cmd, .compat_ioctl = do_pagemap_cmd, }; #endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA struct numa_maps { unsigned long pages; unsigned long anon; unsigned long active; unsigned long writeback; unsigned long mapcount_max; unsigned long dirty; unsigned long swapcache; unsigned long node[MAX_NUMNODES]; }; struct numa_maps_private { struct proc_maps_private proc_maps; struct numa_maps md; }; static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, unsigned long nr_pages) { struct folio *folio = page_folio(page); int count = folio_precise_page_mapcount(folio, page); md->pages += nr_pages; if (pte_dirty || folio_test_dirty(folio)) md->dirty += nr_pages; if (folio_test_swapcache(folio)) md->swapcache += nr_pages; if (folio_test_active(folio) || folio_test_unevictable(folio)) md->active += nr_pages; if (folio_test_writeback(folio)) md->writeback += nr_pages; if (folio_test_anon(folio)) md->anon += nr_pages; if (count > md->mapcount_max) md->mapcount_max = count; md->node[folio_nid(folio)] += nr_pages; } static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, unsigned long addr) { struct page *page; int nid; if (!pte_present(pte)) return NULL; page = vm_normal_page(vma, addr, pte); if (!page || is_zone_device_page(page)) return NULL; if (PageReserved(page)) return NULL; nid = page_to_nid(page); if (!node_isset(nid, node_states[N_MEMORY])) return NULL; return page; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static struct page *can_gather_numa_stats_pmd(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr) { struct page *page; int nid; if (!pmd_present(pmd)) return NULL; page = vm_normal_page_pmd(vma, addr, pmd); if (!page) return NULL; if (PageReserved(page)) return NULL; nid = page_to_nid(page); if (!node_isset(nid, node_states[N_MEMORY])) return NULL; return page; } #endif static int gather_pte_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct numa_maps *md = walk->private; struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; pte_t *orig_pte; pte_t *pte; #ifdef CONFIG_TRANSPARENT_HUGEPAGE ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { struct page *page; page = can_gather_numa_stats_pmd(*pmd, vma, addr); if (page) gather_stats(page, md, pmd_dirty(*pmd), HPAGE_PMD_SIZE/PAGE_SIZE); spin_unlock(ptl); return 0; } #endif orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } do { pte_t ptent = ptep_get(pte); struct page *page = can_gather_numa_stats(ptent, vma, addr); if (!page) continue; gather_stats(page, md, pte_dirty(ptent), 1); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); cond_resched(); return 0; } #ifdef CONFIG_HUGETLB_PAGE static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); struct numa_maps *md; struct page *page; if (!pte_present(huge_pte)) return 0; page = pte_page(huge_pte); md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); return 0; } #else static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { return 0; } #endif static const struct mm_walk_ops show_numa_ops = { .hugetlb_entry = gather_hugetlb_stats, .pmd_entry = gather_pte_stats, .walk_lock = PGWALK_RDLOCK, }; /* * Display pages allocated per node and memory policy via /proc. */ static int show_numa_map(struct seq_file *m, void *v) { struct numa_maps_private *numa_priv = m->private; struct proc_maps_private *proc_priv = &numa_priv->proc_maps; struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; char buffer[64]; struct mempolicy *pol; pgoff_t ilx; int nid; if (!mm) return 0; /* Ensure we start with an empty set of numa_maps statistics. */ memset(md, 0, sizeof(*md)); pol = __get_vma_policy(vma, vma->vm_start, &ilx); if (pol) { mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); } else { mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); } seq_printf(m, "%08lx %s", vma->vm_start, buffer); if (file) { seq_puts(m, " file="); seq_path(m, file_user_path(file), "\n\t= "); } else if (vma_is_initial_heap(vma)) { seq_puts(m, " heap"); } else if (vma_is_initial_stack(vma)) { seq_puts(m, " stack"); } if (is_vm_hugetlb_page(vma)) seq_puts(m, " huge"); /* mmap_lock is held by m_start */ walk_page_vma(vma, &show_numa_ops, md); if (!md->pages) goto out; if (md->anon) seq_printf(m, " anon=%lu", md->anon); if (md->dirty) seq_printf(m, " dirty=%lu", md->dirty); if (md->pages != md->anon && md->pages != md->dirty) seq_printf(m, " mapped=%lu", md->pages); if (md->mapcount_max > 1) seq_printf(m, " mapmax=%lu", md->mapcount_max); if (md->swapcache) seq_printf(m, " swapcache=%lu", md->swapcache); if (md->active < md->pages && !is_vm_hugetlb_page(vma)) seq_printf(m, " active=%lu", md->active); if (md->writeback) seq_printf(m, " writeback=%lu", md->writeback); for_each_node_state(nid, N_MEMORY) if (md->node[nid]) seq_printf(m, " N%d=%lu", nid, md->node[nid]); seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); out: seq_putc(m, '\n'); return 0; } static const struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_numa_map, }; static int pid_numa_maps_open(struct inode *inode, struct file *file) { return proc_maps_open(inode, file, &proc_pid_numa_maps_op, sizeof(struct numa_maps_private)); } const struct file_operations proc_pid_numa_maps_operations = { .open = pid_numa_maps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, }; #endif /* CONFIG_NUMA */ |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 | /* CoreChip-sz SR9800 one chip USB 2.0 Ethernet Devices * * Author : Liu Junliang <liujunliang_ljl@163.com> * * Based on asix_common.c, asix_devices.c * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied.* */ #include <linux/module.h> #include <linux/kmod.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/crc32.h> #include <linux/usb/usbnet.h> #include <linux/slab.h> #include <linux/if_vlan.h> #include "sr9800.h" static int sr_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { int err; err = usbnet_read_cmd(dev, cmd, SR_REQ_RD_REG, value, index, data, size); if ((err != size) && (err >= 0)) err = -EINVAL; return err; } static int sr_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { int err; err = usbnet_write_cmd(dev, cmd, SR_REQ_WR_REG, value, index, data, size); if ((err != size) && (err >= 0)) err = -EINVAL; return err; } static void sr_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { usbnet_write_cmd_async(dev, cmd, SR_REQ_WR_REG, value, index, data, size); } static int sr_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { int offset = 0; /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) return 0; while (offset + sizeof(u32) < skb->len) { struct sk_buff *sr_skb; u16 size; u32 header = get_unaligned_le32(skb->data + offset); offset += sizeof(u32); /* get the packet length */ size = (u16) (header & 0x7ff); if (size != ((~header >> 16) & 0x07ff)) { netdev_err(dev->net, "%s : Bad Header Length\n", __func__); return 0; } if ((size > dev->net->mtu + ETH_HLEN + VLAN_HLEN) || (size + offset > skb->len)) { netdev_err(dev->net, "%s : Bad RX Length %d\n", __func__, size); return 0; } sr_skb = netdev_alloc_skb_ip_align(dev->net, size); if (!sr_skb) return 0; skb_put(sr_skb, size); memcpy(sr_skb->data, skb->data + offset, size); usbnet_skb_return(dev, sr_skb); offset += (size + 1) & 0xfffe; } if (skb->len != offset) { netdev_err(dev->net, "%s : Bad SKB Length %d\n", __func__, skb->len); return 0; } return 1; } static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int headroom = skb_headroom(skb); int tailroom = skb_tailroom(skb); u32 padbytes = 0xffff0000; u32 packet_len; int padlen; void *ptr; padlen = ((skb->len + 4) % (dev->maxpacket - 1)) ? 0 : 4; if ((!skb_cloned(skb)) && ((headroom + tailroom) >= (4 + padlen))) { if ((headroom < 4) || (tailroom < padlen)) { skb->data = memmove(skb->head + 4, skb->data, skb->len); skb_set_tail_pointer(skb, skb->len); } } else { struct sk_buff *skb2; skb2 = skb_copy_expand(skb, 4, padlen, flags); dev_kfree_skb_any(skb); skb = skb2; if (!skb) return NULL; } ptr = skb_push(skb, 4); packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4); put_unaligned_le32(packet_len, ptr); if (padlen) { put_unaligned_le32(padbytes, skb_tail_pointer(skb)); skb_put(skb, sizeof(padbytes)); } usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } static void sr_status(struct usbnet *dev, struct urb *urb) { struct sr9800_int_data *event; int link; if (urb->actual_length < 8) return; event = urb->transfer_buffer; link = event->link & 0x01; if (netif_carrier_ok(dev->net) != link) { usbnet_link_change(dev, link, 1); netdev_dbg(dev->net, "Link Status is: %d\n", link); } return; } static inline int sr_set_sw_mii(struct usbnet *dev) { int ret; ret = sr_write_cmd(dev, SR_CMD_SET_SW_MII, 0x0000, 0, 0, NULL); if (ret < 0) netdev_err(dev->net, "Failed to enable software MII access\n"); return ret; } static inline int sr_set_hw_mii(struct usbnet *dev) { int ret; ret = sr_write_cmd(dev, SR_CMD_SET_HW_MII, 0x0000, 0, 0, NULL); if (ret < 0) netdev_err(dev->net, "Failed to enable hardware MII access\n"); return ret; } static inline int sr_get_phy_addr(struct usbnet *dev) { u8 buf[2]; int ret; ret = sr_read_cmd(dev, SR_CMD_READ_PHY_ID, 0, 0, 2, buf); if (ret < 0) { netdev_err(dev->net, "%s : Error reading PHYID register:%02x\n", __func__, ret); goto out; } netdev_dbg(dev->net, "%s : returning 0x%04x\n", __func__, *((__le16 *)buf)); ret = buf[1]; out: return ret; } static int sr_sw_reset(struct usbnet *dev, u8 flags) { int ret; ret = sr_write_cmd(dev, SR_CMD_SW_RESET, flags, 0, 0, NULL); if (ret < 0) netdev_err(dev->net, "Failed to send software reset:%02x\n", ret); return ret; } static u16 sr_read_rx_ctl(struct usbnet *dev) { __le16 v; int ret; ret = sr_read_cmd(dev, SR_CMD_READ_RX_CTL, 0, 0, 2, &v); if (ret < 0) { netdev_err(dev->net, "Error reading RX_CTL register:%02x\n", ret); goto out; } ret = le16_to_cpu(v); out: return ret; } static int sr_write_rx_ctl(struct usbnet *dev, u16 mode) { int ret; netdev_dbg(dev->net, "%s : mode = 0x%04x\n", __func__, mode); ret = sr_write_cmd(dev, SR_CMD_WRITE_RX_CTL, mode, 0, 0, NULL); if (ret < 0) netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x:%02x\n", mode, ret); return ret; } static u16 sr_read_medium_status(struct usbnet *dev) { __le16 v; int ret; ret = sr_read_cmd(dev, SR_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v); if (ret < 0) { netdev_err(dev->net, "Error reading Medium Status register:%02x\n", ret); return ret; /* TODO: callers not checking for error ret */ } return le16_to_cpu(v); } static int sr_write_medium_mode(struct usbnet *dev, u16 mode) { int ret; netdev_dbg(dev->net, "%s : mode = 0x%04x\n", __func__, mode); ret = sr_write_cmd(dev, SR_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL); if (ret < 0) netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x:%02x\n", mode, ret); return ret; } static int sr_write_gpio(struct usbnet *dev, u16 value, int sleep) { int ret; netdev_dbg(dev->net, "%s : value = 0x%04x\n", __func__, value); ret = sr_write_cmd(dev, SR_CMD_WRITE_GPIOS, value, 0, 0, NULL); if (ret < 0) netdev_err(dev->net, "Failed to write GPIO value 0x%04x:%02x\n", value, ret); if (sleep) msleep(sleep); return ret; } /* SR9800 have a 16-bit RX_CTL value */ static void sr_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct sr_data *data = (struct sr_data *)&dev->data; u16 rx_ctl = SR_DEFAULT_RX_CTL; if (net->flags & IFF_PROMISC) { rx_ctl |= SR_RX_CTL_PRO; } else if (net->flags & IFF_ALLMULTI || netdev_mc_count(net) > SR_MAX_MCAST) { rx_ctl |= SR_RX_CTL_AMALL; } else if (netdev_mc_empty(net)) { /* just broadcast and directed */ } else { /* We use the 20 byte dev->data * for our 8 byte filter buffer * to avoid allocating memory that * is tricky to free later */ struct netdev_hw_addr *ha; u32 crc_bits; memset(data->multi_filter, 0, SR_MCAST_FILTER_SIZE); /* Build the multicast hash filter. */ netdev_for_each_mc_addr(ha, net) { crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26; data->multi_filter[crc_bits >> 3] |= 1 << (crc_bits & 7); } sr_write_cmd_async(dev, SR_CMD_WRITE_MULTI_FILTER, 0, 0, SR_MCAST_FILTER_SIZE, data->multi_filter); rx_ctl |= SR_RX_CTL_AM; } sr_write_cmd_async(dev, SR_CMD_WRITE_RX_CTL, rx_ctl, 0, 0, NULL); } static int sr_mdio_read(struct net_device *net, int phy_id, int loc) { struct usbnet *dev = netdev_priv(net); __le16 res = 0; mutex_lock(&dev->phy_mutex); sr_set_sw_mii(dev); sr_read_cmd(dev, SR_CMD_READ_MII_REG, phy_id, (__u16)loc, 2, &res); sr_set_hw_mii(dev); mutex_unlock(&dev->phy_mutex); netdev_dbg(dev->net, "%s : phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", __func__, phy_id, loc, le16_to_cpu(res)); return le16_to_cpu(res); } static void sr_mdio_write(struct net_device *net, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(net); __le16 res = cpu_to_le16(val); netdev_dbg(dev->net, "%s : phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", __func__, phy_id, loc, val); mutex_lock(&dev->phy_mutex); sr_set_sw_mii(dev); sr_write_cmd(dev, SR_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res); sr_set_hw_mii(dev); mutex_unlock(&dev->phy_mutex); } /* Get the PHY Identifier from the PHYSID1 & PHYSID2 MII registers */ static u32 sr_get_phyid(struct usbnet *dev) { int phy_reg; u32 phy_id; int i; /* Poll for the rare case the FW or phy isn't ready yet. */ for (i = 0; i < 100; i++) { phy_reg = sr_mdio_read(dev->net, dev->mii.phy_id, MII_PHYSID1); if (phy_reg != 0 && phy_reg != 0xFFFF) break; mdelay(1); } if (phy_reg <= 0 || phy_reg == 0xFFFF) return 0; phy_id = (phy_reg & 0xffff) << 16; phy_reg = sr_mdio_read(dev->net, dev->mii.phy_id, MII_PHYSID2); if (phy_reg < 0) return 0; phy_id |= (phy_reg & 0xffff); return phy_id; } static void sr_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); u8 opt; if (sr_read_cmd(dev, SR_CMD_READ_MONITOR_MODE, 0, 0, 1, &opt) < 0) { wolinfo->supported = 0; wolinfo->wolopts = 0; return; } wolinfo->supported = WAKE_PHY | WAKE_MAGIC; wolinfo->wolopts = 0; if (opt & SR_MONITOR_LINK) wolinfo->wolopts |= WAKE_PHY; if (opt & SR_MONITOR_MAGIC) wolinfo->wolopts |= WAKE_MAGIC; } static int sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); u8 opt = 0; if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) return -EINVAL; if (wolinfo->wolopts & WAKE_PHY) opt |= SR_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) opt |= SR_MONITOR_MAGIC; if (sr_write_cmd(dev, SR_CMD_WRITE_MONITOR_MODE, opt, 0, 0, NULL) < 0) return -EINVAL; return 0; } static int sr_get_eeprom_len(struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct sr_data *data = (struct sr_data *)&dev->data; return data->eeprom_len; } static int sr_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, u8 *data) { struct usbnet *dev = netdev_priv(net); __le16 *ebuf = (__le16 *)data; int ret; int i; /* Crude hack to ensure that we don't overwrite memory * if an odd length is supplied */ if (eeprom->len % 2) return -EINVAL; eeprom->magic = SR_EEPROM_MAGIC; /* sr9800 returns 2 bytes from eeprom on read */ for (i = 0; i < eeprom->len / 2; i++) { ret = sr_read_cmd(dev, SR_CMD_READ_EEPROM, eeprom->offset + i, 0, 2, &ebuf[i]); if (ret < 0) return -EINVAL; } return 0; } static void sr_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { /* Inherit standard device info */ usbnet_get_drvinfo(net, info); strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); strscpy(info->version, DRIVER_VERSION, sizeof(info->version)); } static u32 sr_get_link(struct net_device *net) { struct usbnet *dev = netdev_priv(net); return mii_link_ok(&dev->mii); } static int sr_ioctl(struct net_device *net, struct ifreq *rq, int cmd) { struct usbnet *dev = netdev_priv(net); return generic_mii_ioctl(&dev->mii, if_mii(rq), cmd, NULL); } static int sr_set_mac_address(struct net_device *net, void *p) { struct usbnet *dev = netdev_priv(net); struct sr_data *data = (struct sr_data *)&dev->data; struct sockaddr *addr = p; if (netif_running(net)) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; eth_hw_addr_set(net, addr->sa_data); /* We use the 20 byte dev->data * for our 6 byte mac buffer * to avoid allocating memory that * is tricky to free later */ memcpy(data->mac_addr, addr->sa_data, ETH_ALEN); sr_write_cmd_async(dev, SR_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, data->mac_addr); return 0; } static const struct ethtool_ops sr9800_ethtool_ops = { .get_drvinfo = sr_get_drvinfo, .get_link = sr_get_link, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_wol = sr_get_wol, .set_wol = sr_set_wol, .get_eeprom_len = sr_get_eeprom_len, .get_eeprom = sr_get_eeprom, .nway_reset = usbnet_nway_reset, .get_link_ksettings = usbnet_get_link_ksettings_mii, .set_link_ksettings = usbnet_set_link_ksettings_mii, }; static int sr9800_link_reset(struct usbnet *dev) { struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; u16 mode; mii_check_media(&dev->mii, 1, 1); mii_ethtool_gset(&dev->mii, &ecmd); mode = SR9800_MEDIUM_DEFAULT; if (ethtool_cmd_speed(&ecmd) != SPEED_100) mode &= ~SR_MEDIUM_PS; if (ecmd.duplex != DUPLEX_FULL) mode &= ~SR_MEDIUM_FD; netdev_dbg(dev->net, "%s : speed: %u duplex: %d mode: 0x%04x\n", __func__, ethtool_cmd_speed(&ecmd), ecmd.duplex, mode); sr_write_medium_mode(dev, mode); return 0; } static int sr9800_set_default_mode(struct usbnet *dev) { u16 rx_ctl; int ret; sr_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); sr_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA); mii_nway_restart(&dev->mii); ret = sr_write_medium_mode(dev, SR9800_MEDIUM_DEFAULT); if (ret < 0) goto out; ret = sr_write_cmd(dev, SR_CMD_WRITE_IPG012, SR9800_IPG0_DEFAULT | SR9800_IPG1_DEFAULT, SR9800_IPG2_DEFAULT, 0, NULL); if (ret < 0) { netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret); goto out; } /* Set RX_CTL to default values with 2k buffer, and enable cactus */ ret = sr_write_rx_ctl(dev, SR_DEFAULT_RX_CTL); if (ret < 0) goto out; rx_ctl = sr_read_rx_ctl(dev); netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n", rx_ctl); rx_ctl = sr_read_medium_status(dev); netdev_dbg(dev->net, "Medium Status:0x%04x after all initializations\n", rx_ctl); return 0; out: return ret; } static int sr9800_reset(struct usbnet *dev) { struct sr_data *data = (struct sr_data *)&dev->data; int ret, embd_phy; u16 rx_ctl; ret = sr_write_gpio(dev, SR_GPIO_RSE | SR_GPIO_GPO_2 | SR_GPIO_GPO2EN, 5); if (ret < 0) goto out; embd_phy = ((sr_get_phy_addr(dev) & 0x1f) == 0x10 ? 1 : 0); ret = sr_write_cmd(dev, SR_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL); if (ret < 0) { netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); goto out; } ret = sr_sw_reset(dev, SR_SWRESET_IPPD | SR_SWRESET_PRL); if (ret < 0) goto out; msleep(150); ret = sr_sw_reset(dev, SR_SWRESET_CLEAR); if (ret < 0) goto out; msleep(150); if (embd_phy) { ret = sr_sw_reset(dev, SR_SWRESET_IPRL); if (ret < 0) goto out; } else { ret = sr_sw_reset(dev, SR_SWRESET_PRTE); if (ret < 0) goto out; } msleep(150); rx_ctl = sr_read_rx_ctl(dev); netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl); ret = sr_write_rx_ctl(dev, 0x0000); if (ret < 0) goto out; rx_ctl = sr_read_rx_ctl(dev); netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl); ret = sr_sw_reset(dev, SR_SWRESET_PRL); if (ret < 0) goto out; msleep(150); ret = sr_sw_reset(dev, SR_SWRESET_IPRL | SR_SWRESET_PRL); if (ret < 0) goto out; msleep(150); ret = sr9800_set_default_mode(dev); if (ret < 0) goto out; /* Rewrite MAC address */ memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); ret = sr_write_cmd(dev, SR_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, data->mac_addr); if (ret < 0) goto out; return 0; out: return ret; } static const struct net_device_ops sr9800_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = sr_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_eth_ioctl = sr_ioctl, .ndo_set_rx_mode = sr_set_multicast, }; static int sr9800_phy_powerup(struct usbnet *dev) { int ret; /* set the embedded Ethernet PHY in power-down state */ ret = sr_sw_reset(dev, SR_SWRESET_IPPD | SR_SWRESET_IPRL); if (ret < 0) { netdev_err(dev->net, "Failed to power down PHY : %d\n", ret); return ret; } msleep(20); /* set the embedded Ethernet PHY in power-up state */ ret = sr_sw_reset(dev, SR_SWRESET_IPRL); if (ret < 0) { netdev_err(dev->net, "Failed to reset PHY: %d\n", ret); return ret; } msleep(600); /* set the embedded Ethernet PHY in reset state */ ret = sr_sw_reset(dev, SR_SWRESET_CLEAR); if (ret < 0) { netdev_err(dev->net, "Failed to power up PHY: %d\n", ret); return ret; } msleep(20); /* set the embedded Ethernet PHY in power-up state */ ret = sr_sw_reset(dev, SR_SWRESET_IPRL); if (ret < 0) { netdev_err(dev->net, "Failed to reset PHY: %d\n", ret); return ret; } return 0; } static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) { struct sr_data *data = (struct sr_data *)&dev->data; u16 led01_mux, led23_mux; int ret, embd_phy; u8 addr[ETH_ALEN]; u32 phyid; u16 rx_ctl; data->eeprom_len = SR9800_EEPROM_LEN; ret = usbnet_get_endpoints(dev, intf); if (ret) goto out; /* LED Setting Rule : * AABB:CCDD * AA : MFA0(LED0) * BB : MFA1(LED1) * CC : MFA2(LED2), Reserved for SR9800 * DD : MFA3(LED3), Reserved for SR9800 */ led01_mux = (SR_LED_MUX_LINK_ACTIVE << 8) | SR_LED_MUX_LINK; led23_mux = (SR_LED_MUX_LINK_ACTIVE << 8) | SR_LED_MUX_TX_ACTIVE; ret = sr_write_cmd(dev, SR_CMD_LED_MUX, led01_mux, led23_mux, 0, NULL); if (ret < 0) { netdev_err(dev->net, "set LINK LED failed : %d\n", ret); goto out; } /* Get the MAC address */ ret = sr_read_cmd(dev, SR_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, addr); if (ret < 0) { netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret); return ret; } eth_hw_addr_set(dev->net, addr); netdev_dbg(dev->net, "mac addr : %pM\n", dev->net->dev_addr); /* Initialize MII structure */ dev->mii.dev = dev->net; dev->mii.mdio_read = sr_mdio_read; dev->mii.mdio_write = sr_mdio_write; dev->mii.phy_id_mask = 0x1f; dev->mii.reg_num_mask = 0x1f; dev->mii.phy_id = sr_get_phy_addr(dev); dev->net->netdev_ops = &sr9800_netdev_ops; dev->net->ethtool_ops = &sr9800_ethtool_ops; embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); /* Reset the PHY to normal operation mode */ ret = sr_write_cmd(dev, SR_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL); if (ret < 0) { netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); return ret; } /* Init PHY routine */ ret = sr9800_phy_powerup(dev); if (ret < 0) goto out; rx_ctl = sr_read_rx_ctl(dev); netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl); ret = sr_write_rx_ctl(dev, 0x0000); if (ret < 0) goto out; rx_ctl = sr_read_rx_ctl(dev); netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl); /* Read PHYID register *AFTER* the PHY was reset properly */ phyid = sr_get_phyid(dev); netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid); /* medium mode setting */ ret = sr9800_set_default_mode(dev); if (ret < 0) goto out; if (dev->udev->speed == USB_SPEED_HIGH) { ret = sr_write_cmd(dev, SR_CMD_BULKIN_SIZE, SR9800_BULKIN_SIZE[SR9800_MAX_BULKIN_4K].byte_cnt, SR9800_BULKIN_SIZE[SR9800_MAX_BULKIN_4K].threshold, 0, NULL); if (ret < 0) { netdev_err(dev->net, "Reset RX_CTL failed: %d\n", ret); goto out; } dev->rx_urb_size = SR9800_BULKIN_SIZE[SR9800_MAX_BULKIN_4K].size; } else { ret = sr_write_cmd(dev, SR_CMD_BULKIN_SIZE, SR9800_BULKIN_SIZE[SR9800_MAX_BULKIN_2K].byte_cnt, SR9800_BULKIN_SIZE[SR9800_MAX_BULKIN_2K].threshold, 0, NULL); if (ret < 0) { netdev_err(dev->net, "Reset RX_CTL failed: %d\n", ret); goto out; } dev->rx_urb_size = SR9800_BULKIN_SIZE[SR9800_MAX_BULKIN_2K].size; } netdev_dbg(dev->net, "%s : setting rx_urb_size with : %zu\n", __func__, dev->rx_urb_size); return 0; out: return ret; } static const struct driver_info sr9800_driver_info = { .description = "CoreChip SR9800 USB 2.0 Ethernet", .bind = sr9800_bind, .status = sr_status, .link_reset = sr9800_link_reset, .reset = sr9800_reset, .flags = DRIVER_FLAG, .rx_fixup = sr_rx_fixup, .tx_fixup = sr_tx_fixup, }; static const struct usb_device_id products[] = { { USB_DEVICE(0x0fe6, 0x9800), /* SR9800 Device */ .driver_info = (unsigned long) &sr9800_driver_info, }, {}, /* END */ }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver sr_driver = { .name = DRIVER_NAME, .id_table = products, .probe = usbnet_probe, .suspend = usbnet_suspend, .resume = usbnet_resume, .disconnect = usbnet_disconnect, .supports_autosuspend = 1, }; module_usb_driver(sr_driver); MODULE_AUTHOR("Liu Junliang <liujunliang_ljl@163.com"); MODULE_VERSION(DRIVER_VERSION); MODULE_DESCRIPTION("SR9800 USB 2.0 USB2NET Dev : http://www.corechip-sz.com"); MODULE_LICENSE("GPL"); |
26 17 25 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 | /* * linux/fs/nls/nls_cp861.c * * Charset cp861 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00d0, 0x00f0, 0x00de, 0x00c4, 0x00c5, /* 0x90*/ 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00fe, 0x00fb, 0x00dd, 0x00fd, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x20a7, 0x0192, /* 0xa0*/ 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00c1, 0x00cd, 0x00d3, 0x00da, 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, /* 0xd0*/ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, /* 0xe0*/ 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, /* 0xf0*/ 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0xad, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0xae, 0xaa, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0xaf, 0xac, 0xab, 0x00, 0xa8, /* 0xb8-0xbf */ 0x00, 0xa4, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80, /* 0xc0-0xc7 */ 0x00, 0x90, 0x00, 0x00, 0x00, 0xa5, 0x00, 0x00, /* 0xc8-0xcf */ 0x8b, 0x00, 0x00, 0xa6, 0x00, 0x00, 0x99, 0x00, /* 0xd0-0xd7 */ 0x9d, 0x00, 0xa7, 0x00, 0x9a, 0x97, 0x8d, 0xe1, /* 0xd8-0xdf */ 0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91, 0x87, /* 0xe0-0xe7 */ 0x8a, 0x82, 0x88, 0x89, 0x00, 0xa1, 0x00, 0x00, /* 0xe8-0xef */ 0x8c, 0x00, 0x00, 0xa2, 0x93, 0x00, 0x94, 0xf6, /* 0xf0-0xf7 */ 0x9b, 0x00, 0xa3, 0x96, 0x81, 0x98, 0x95, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0xe2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0xe9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0xe8, 0x00, /* 0xa0-0xa7 */ 0x00, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0xe0, 0x00, 0x00, 0xeb, 0xee, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0xe3, 0x00, 0x00, 0xe5, 0xe7, 0x00, 0xed, 0x00, /* 0xc0-0xc7 */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9e, /* 0xa0-0xa7 */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0xf9, 0xfb, 0x00, 0x00, 0x00, 0xec, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0xf0, 0x00, 0x00, 0xf3, 0xf2, 0x00, 0x00, /* 0x60-0x67 */ }; static const unsigned char page23[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0xf4, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0xd5, 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, /* 0x50-0x57 */ 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, /* 0x58-0x5f */ 0xcc, 0xb5, 0xb6, 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, /* 0x60-0x67 */ 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xde, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, page22, page23, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x87, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8c, 0x8c, 0x95, 0x84, 0x86, /* 0x88-0x8f */ 0x82, 0x91, 0x91, 0x93, 0x94, 0x95, 0x96, 0x98, /* 0x90-0x97 */ 0x98, 0x94, 0x81, 0x9b, 0x9c, 0x9b, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa0, 0xa1, 0xa2, 0xa3, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0x00, 0xe3, 0xe5, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xed, 0x00, 0x00, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x9a, 0x90, 0x00, 0x8e, 0x00, 0x8f, 0x80, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x8b, 0x8b, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x92, 0x92, 0x00, 0x99, 0x8d, 0x00, 0x97, /* 0x90-0x97 */ 0x97, 0x99, 0x9a, 0x9d, 0x9c, 0x9d, 0x9e, 0x00, /* 0x98-0x9f */ 0xa4, 0xa5, 0xa6, 0xa7, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0x00, 0xe1, 0xe2, 0x00, 0xe4, 0xe4, 0x00, 0x00, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0x00, 0xec, 0xe8, 0x00, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp861", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp861(void) { return register_nls(&table); } static void __exit exit_nls_cp861(void) { unregister_nls(&table); } module_init(init_nls_cp861) module_exit(exit_nls_cp861) MODULE_DESCRIPTION("NLS Codepage 861 (Icelandic)"); MODULE_LICENSE("Dual BSD/GPL"); |
2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2004, 2005 Oracle. All rights reserved. */ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/configfs.h> #include "tcp.h" #include "nodemanager.h" #include "heartbeat.h" #include "masklog.h" #include "sys.h" /* for now we operate under the assertion that there can be only one * cluster active at a time. Changing this will require trickling * cluster references throughout where nodes are looked up */ struct o2nm_cluster *o2nm_single_cluster = NULL; static const char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { "reset", /* O2NM_FENCE_RESET */ "panic", /* O2NM_FENCE_PANIC */ }; static inline void o2nm_lock_subsystem(void); static inline void o2nm_unlock_subsystem(void); struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { struct o2nm_node *node = NULL; if (node_num >= O2NM_MAX_NODES || o2nm_single_cluster == NULL) goto out; read_lock(&o2nm_single_cluster->cl_nodes_lock); node = o2nm_single_cluster->cl_nodes[node_num]; if (node) config_item_get(&node->nd_item); read_unlock(&o2nm_single_cluster->cl_nodes_lock); out: return node; } EXPORT_SYMBOL_GPL(o2nm_get_node_by_num); int o2nm_configured_node_map(unsigned long *map, unsigned bytes) { struct o2nm_cluster *cluster = o2nm_single_cluster; BUG_ON(bytes < (sizeof(cluster->cl_nodes_bitmap))); if (cluster == NULL) return -EINVAL; read_lock(&cluster->cl_nodes_lock); bitmap_copy(map, cluster->cl_nodes_bitmap, O2NM_MAX_NODES); read_unlock(&cluster->cl_nodes_lock); return 0; } EXPORT_SYMBOL_GPL(o2nm_configured_node_map); static struct o2nm_node *o2nm_node_ip_tree_lookup(struct o2nm_cluster *cluster, __be32 ip_needle, struct rb_node ***ret_p, struct rb_node **ret_parent) { struct rb_node **p = &cluster->cl_node_ip_tree.rb_node; struct rb_node *parent = NULL; struct o2nm_node *node, *ret = NULL; while (*p) { int cmp; parent = *p; node = rb_entry(parent, struct o2nm_node, nd_ip_node); cmp = memcmp(&ip_needle, &node->nd_ipv4_address, sizeof(ip_needle)); if (cmp < 0) p = &(*p)->rb_left; else if (cmp > 0) p = &(*p)->rb_right; else { ret = node; break; } } if (ret_p != NULL) *ret_p = p; if (ret_parent != NULL) *ret_parent = parent; return ret; } struct o2nm_node *o2nm_get_node_by_ip(__be32 addr) { struct o2nm_node *node = NULL; struct o2nm_cluster *cluster = o2nm_single_cluster; if (cluster == NULL) goto out; read_lock(&cluster->cl_nodes_lock); node = o2nm_node_ip_tree_lookup(cluster, addr, NULL, NULL); if (node) config_item_get(&node->nd_item); read_unlock(&cluster->cl_nodes_lock); out: return node; } EXPORT_SYMBOL_GPL(o2nm_get_node_by_ip); void o2nm_node_put(struct o2nm_node *node) { config_item_put(&node->nd_item); } EXPORT_SYMBOL_GPL(o2nm_node_put); void o2nm_node_get(struct o2nm_node *node) { config_item_get(&node->nd_item); } EXPORT_SYMBOL_GPL(o2nm_node_get); u8 o2nm_this_node(void) { u8 node_num = O2NM_MAX_NODES; if (o2nm_single_cluster && o2nm_single_cluster->cl_has_local) node_num = o2nm_single_cluster->cl_local_node; return node_num; } EXPORT_SYMBOL_GPL(o2nm_this_node); /* node configfs bits */ static struct o2nm_cluster *to_o2nm_cluster(struct config_item *item) { return item ? container_of(to_config_group(item), struct o2nm_cluster, cl_group) : NULL; } static struct o2nm_node *to_o2nm_node(struct config_item *item) { return item ? container_of(item, struct o2nm_node, nd_item) : NULL; } static void o2nm_node_release(struct config_item *item) { struct o2nm_node *node = to_o2nm_node(item); kfree(node); } static ssize_t o2nm_node_num_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", to_o2nm_node(item)->nd_num); } static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node) { /* through the first node_set .parent * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */ if (node->nd_item.ci_parent) return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent); else return NULL; } enum { O2NM_NODE_ATTR_NUM = 0, O2NM_NODE_ATTR_PORT, O2NM_NODE_ATTR_ADDRESS, }; static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster; unsigned long tmp; char *p = (char *)page; int ret = 0; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; if (tmp >= O2NM_MAX_NODES) return -ERANGE; /* once we're in the cl_nodes tree networking can look us up by * node number and try to use our address and port attributes * to connect to this node.. make sure that they've been set * before writing the node attribute? */ if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) || !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EINVAL; /* XXX */ o2nm_lock_subsystem(); cluster = to_o2nm_cluster_from_node(node); if (!cluster) { o2nm_unlock_subsystem(); return -EINVAL; } write_lock(&cluster->cl_nodes_lock); if (cluster->cl_nodes[tmp]) ret = -EEXIST; else if (test_and_set_bit(O2NM_NODE_ATTR_NUM, &node->nd_set_attributes)) ret = -EBUSY; else { cluster->cl_nodes[tmp] = node; node->nd_num = tmp; set_bit(tmp, cluster->cl_nodes_bitmap); } write_unlock(&cluster->cl_nodes_lock); o2nm_unlock_subsystem(); if (ret) return ret; return count; } static ssize_t o2nm_node_ipv4_port_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", ntohs(to_o2nm_node(item)->nd_ipv4_port)); } static ssize_t o2nm_node_ipv4_port_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); unsigned long tmp; char *p = (char *)page; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; if (tmp == 0) return -EINVAL; if (tmp >= (u16)-1) return -ERANGE; if (test_and_set_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EBUSY; node->nd_ipv4_port = htons(tmp); return count; } static ssize_t o2nm_node_ipv4_address_show(struct config_item *item, char *page) { return sprintf(page, "%pI4\n", &to_o2nm_node(item)->nd_ipv4_address); } static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster; int ret, i; struct rb_node **p, *parent; unsigned int octets[4]; __be32 ipv4_addr = 0; ret = sscanf(page, "%3u.%3u.%3u.%3u", &octets[3], &octets[2], &octets[1], &octets[0]); if (ret != 4) return -EINVAL; for (i = 0; i < ARRAY_SIZE(octets); i++) { if (octets[i] > 255) return -ERANGE; be32_add_cpu(&ipv4_addr, octets[i] << (i * 8)); } o2nm_lock_subsystem(); cluster = to_o2nm_cluster_from_node(node); if (!cluster) { o2nm_unlock_subsystem(); return -EINVAL; } ret = 0; write_lock(&cluster->cl_nodes_lock); if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent)) ret = -EEXIST; else if (test_and_set_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes)) ret = -EBUSY; else { rb_link_node(&node->nd_ip_node, parent, p); rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree); } write_unlock(&cluster->cl_nodes_lock); o2nm_unlock_subsystem(); if (ret) return ret; memcpy(&node->nd_ipv4_address, &ipv4_addr, sizeof(ipv4_addr)); return count; } static ssize_t o2nm_node_local_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", to_o2nm_node(item)->nd_local); } static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster; unsigned long tmp; char *p = (char *)page; ssize_t ret; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; tmp = !!tmp; /* boolean of whether this node wants to be local */ /* setting local turns on networking rx for now so we require having * set everything else first */ if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) || !test_bit(O2NM_NODE_ATTR_NUM, &node->nd_set_attributes) || !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EINVAL; /* XXX */ o2nm_lock_subsystem(); cluster = to_o2nm_cluster_from_node(node); if (!cluster) { ret = -EINVAL; goto out; } /* the only failure case is trying to set a new local node * when a different one is already set */ if (tmp && tmp == cluster->cl_has_local && cluster->cl_local_node != node->nd_num) { ret = -EBUSY; goto out; } /* bring up the rx thread if we're setting the new local node. */ if (tmp && !cluster->cl_has_local) { ret = o2net_start_listening(node); if (ret) goto out; } if (!tmp && cluster->cl_has_local && cluster->cl_local_node == node->nd_num) { o2net_stop_listening(node); cluster->cl_local_node = O2NM_INVALID_NODE_NUM; } node->nd_local = tmp; if (node->nd_local) { cluster->cl_has_local = tmp; cluster->cl_local_node = node->nd_num; } ret = count; out: o2nm_unlock_subsystem(); return ret; } CONFIGFS_ATTR(o2nm_node_, num); CONFIGFS_ATTR(o2nm_node_, ipv4_port); CONFIGFS_ATTR(o2nm_node_, ipv4_address); CONFIGFS_ATTR(o2nm_node_, local); static struct configfs_attribute *o2nm_node_attrs[] = { &o2nm_node_attr_num, &o2nm_node_attr_ipv4_port, &o2nm_node_attr_ipv4_address, &o2nm_node_attr_local, NULL, }; static struct configfs_item_operations o2nm_node_item_ops = { .release = o2nm_node_release, }; static const struct config_item_type o2nm_node_type = { .ct_item_ops = &o2nm_node_item_ops, .ct_attrs = o2nm_node_attrs, .ct_owner = THIS_MODULE, }; /* node set */ struct o2nm_node_group { struct config_group ns_group; /* some stuff? */ }; #if 0 static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group) { return group ? container_of(group, struct o2nm_node_group, ns_group) : NULL; } #endif static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, unsigned int *val) { unsigned long tmp; char *p = (char *)page; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) return -EINVAL; if (tmp == 0) return -EINVAL; if (tmp >= (u32)-1) return -ERANGE; *val = tmp; return count; } static ssize_t o2nm_cluster_idle_timeout_ms_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_idle_timeout_ms); } static ssize_t o2nm_cluster_idle_timeout_ms_store(struct config_item *item, const char *page, size_t count) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { if (cluster->cl_idle_timeout_ms != val && o2net_num_connected_peers()) { mlog(ML_NOTICE, "o2net: cannot change idle timeout after " "the first peer has agreed to it." " %d connected peers\n", o2net_num_connected_peers()); ret = -EINVAL; } else if (val <= cluster->cl_keepalive_delay_ms) { mlog(ML_NOTICE, "o2net: idle timeout must be larger " "than keepalive delay\n"); ret = -EINVAL; } else { cluster->cl_idle_timeout_ms = val; } } return ret; } static ssize_t o2nm_cluster_keepalive_delay_ms_show( struct config_item *item, char *page) { return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_keepalive_delay_ms); } static ssize_t o2nm_cluster_keepalive_delay_ms_store( struct config_item *item, const char *page, size_t count) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { if (cluster->cl_keepalive_delay_ms != val && o2net_num_connected_peers()) { mlog(ML_NOTICE, "o2net: cannot change keepalive delay after" " the first peer has agreed to it." " %d connected peers\n", o2net_num_connected_peers()); ret = -EINVAL; } else if (val >= cluster->cl_idle_timeout_ms) { mlog(ML_NOTICE, "o2net: keepalive delay must be " "smaller than idle timeout\n"); ret = -EINVAL; } else { cluster->cl_keepalive_delay_ms = val; } } return ret; } static ssize_t o2nm_cluster_reconnect_delay_ms_show( struct config_item *item, char *page) { return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_reconnect_delay_ms); } static ssize_t o2nm_cluster_reconnect_delay_ms_store( struct config_item *item, const char *page, size_t count) { return o2nm_cluster_attr_write(page, count, &to_o2nm_cluster(item)->cl_reconnect_delay_ms); } static ssize_t o2nm_cluster_fence_method_show( struct config_item *item, char *page) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret = 0; if (cluster) ret = sprintf(page, "%s\n", o2nm_fence_method_desc[cluster->cl_fence_method]); return ret; } static ssize_t o2nm_cluster_fence_method_store( struct config_item *item, const char *page, size_t count) { unsigned int i; if (page[count - 1] != '\n') goto bail; for (i = 0; i < O2NM_FENCE_METHODS; ++i) { if (count != strlen(o2nm_fence_method_desc[i]) + 1) continue; if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1)) continue; if (to_o2nm_cluster(item)->cl_fence_method != i) { printk(KERN_INFO "ocfs2: Changing fence method to %s\n", o2nm_fence_method_desc[i]); to_o2nm_cluster(item)->cl_fence_method = i; } return count; } bail: return -EINVAL; } CONFIGFS_ATTR(o2nm_cluster_, idle_timeout_ms); CONFIGFS_ATTR(o2nm_cluster_, keepalive_delay_ms); CONFIGFS_ATTR(o2nm_cluster_, reconnect_delay_ms); CONFIGFS_ATTR(o2nm_cluster_, fence_method); static struct configfs_attribute *o2nm_cluster_attrs[] = { &o2nm_cluster_attr_idle_timeout_ms, &o2nm_cluster_attr_keepalive_delay_ms, &o2nm_cluster_attr_reconnect_delay_ms, &o2nm_cluster_attr_fence_method, NULL, }; static struct config_item *o2nm_node_group_make_item(struct config_group *group, const char *name) { struct o2nm_node *node = NULL; if (strlen(name) > O2NM_MAX_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL); if (node == NULL) return ERR_PTR(-ENOMEM); strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */ config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); spin_lock_init(&node->nd_lock); mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name); return &node->nd_item; } static void o2nm_node_group_drop_item(struct config_group *group, struct config_item *item) { struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster(group->cg_item.ci_parent); if (cluster->cl_nodes[node->nd_num] == node) { o2net_disconnect_node(node); if (cluster->cl_has_local && (cluster->cl_local_node == node->nd_num)) { cluster->cl_has_local = 0; cluster->cl_local_node = O2NM_INVALID_NODE_NUM; o2net_stop_listening(node); } } /* XXX call into net to stop this node from trading messages */ write_lock(&cluster->cl_nodes_lock); /* XXX sloppy */ if (node->nd_ipv4_address) rb_erase(&node->nd_ip_node, &cluster->cl_node_ip_tree); /* nd_num might be 0 if the node number hasn't been set.. */ if (cluster->cl_nodes[node->nd_num] == node) { cluster->cl_nodes[node->nd_num] = NULL; clear_bit(node->nd_num, cluster->cl_nodes_bitmap); } write_unlock(&cluster->cl_nodes_lock); mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n", config_item_name(&node->nd_item)); config_item_put(item); } static struct configfs_group_operations o2nm_node_group_group_ops = { .make_item = o2nm_node_group_make_item, .drop_item = o2nm_node_group_drop_item, }; static const struct config_item_type o2nm_node_group_type = { .ct_group_ops = &o2nm_node_group_group_ops, .ct_owner = THIS_MODULE, }; /* cluster */ static void o2nm_cluster_release(struct config_item *item) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); kfree(cluster); } static struct configfs_item_operations o2nm_cluster_item_ops = { .release = o2nm_cluster_release, }; static const struct config_item_type o2nm_cluster_type = { .ct_item_ops = &o2nm_cluster_item_ops, .ct_attrs = o2nm_cluster_attrs, .ct_owner = THIS_MODULE, }; /* cluster set */ struct o2nm_cluster_group { struct configfs_subsystem cs_subsys; /* some stuff? */ }; #if 0 static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *group) { return group ? container_of(to_configfs_subsystem(group), struct o2nm_cluster_group, cs_subsys) : NULL; } #endif static struct config_group *o2nm_cluster_group_make_group(struct config_group *group, const char *name) { struct o2nm_cluster *cluster = NULL; struct o2nm_node_group *ns = NULL; struct config_group *o2hb_group = NULL, *ret = NULL; /* this runs under the parent dir's i_rwsem; there can be only * one caller in here at a time */ if (o2nm_single_cluster) return ERR_PTR(-ENOSPC); cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL); ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL); o2hb_group = o2hb_alloc_hb_set(); if (cluster == NULL || ns == NULL || o2hb_group == NULL) goto out; config_group_init_type_name(&cluster->cl_group, name, &o2nm_cluster_type); configfs_add_default_group(&ns->ns_group, &cluster->cl_group); config_group_init_type_name(&ns->ns_group, "node", &o2nm_node_group_type); configfs_add_default_group(o2hb_group, &cluster->cl_group); rwlock_init(&cluster->cl_nodes_lock); cluster->cl_node_ip_tree = RB_ROOT; cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; cluster->cl_fence_method = O2NM_FENCE_RESET; ret = &cluster->cl_group; o2nm_single_cluster = cluster; out: if (ret == NULL) { kfree(cluster); kfree(ns); o2hb_free_hb_set(o2hb_group); ret = ERR_PTR(-ENOMEM); } return ret; } static void o2nm_cluster_group_drop_item(struct config_group *group, struct config_item *item) { struct o2nm_cluster *cluster = to_o2nm_cluster(item); BUG_ON(o2nm_single_cluster != cluster); o2nm_single_cluster = NULL; configfs_remove_default_groups(&cluster->cl_group); config_item_put(item); } static struct configfs_group_operations o2nm_cluster_group_group_ops = { .make_group = o2nm_cluster_group_make_group, .drop_item = o2nm_cluster_group_drop_item, }; static const struct config_item_type o2nm_cluster_group_type = { .ct_group_ops = &o2nm_cluster_group_group_ops, .ct_owner = THIS_MODULE, }; static struct o2nm_cluster_group o2nm_cluster_group = { .cs_subsys = { .su_group = { .cg_item = { .ci_namebuf = "cluster", .ci_type = &o2nm_cluster_group_type, }, }, }, }; static inline void o2nm_lock_subsystem(void) { mutex_lock(&o2nm_cluster_group.cs_subsys.su_mutex); } static inline void o2nm_unlock_subsystem(void) { mutex_unlock(&o2nm_cluster_group.cs_subsys.su_mutex); } int o2nm_depend_item(struct config_item *item) { return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); } void o2nm_undepend_item(struct config_item *item) { configfs_undepend_item(item); } int o2nm_depend_this_node(void) { int ret = 0; struct o2nm_node *local_node; local_node = o2nm_get_node_by_num(o2nm_this_node()); if (!local_node) { ret = -EINVAL; goto out; } ret = o2nm_depend_item(&local_node->nd_item); o2nm_node_put(local_node); out: return ret; } void o2nm_undepend_this_node(void) { struct o2nm_node *local_node; local_node = o2nm_get_node_by_num(o2nm_this_node()); BUG_ON(!local_node); o2nm_undepend_item(&local_node->nd_item); o2nm_node_put(local_node); } static void __exit exit_o2nm(void) { /* XXX sync with hb callbacks and shut down hb? */ o2net_unregister_hb_callbacks(); configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); o2cb_sys_shutdown(); o2net_exit(); o2hb_exit(); } static int __init init_o2nm(void) { int ret; o2hb_init(); ret = o2net_init(); if (ret) goto out_o2hb; ret = o2net_register_hb_callbacks(); if (ret) goto out_o2net; config_group_init(&o2nm_cluster_group.cs_subsys.su_group); mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys); if (ret) { printk(KERN_ERR "nodemanager: Registration returned %d\n", ret); goto out_callbacks; } ret = o2cb_sys_init(); if (!ret) goto out; configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); out_callbacks: o2net_unregister_hb_callbacks(); out_o2net: o2net_exit(); out_o2hb: o2hb_exit(); out: return ret; } MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("OCFS2 cluster management"); module_init(init_o2nm) module_exit(exit_o2nm) |
1 3 3 4 4 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Bridge per vlan tunnel port dst_metadata netlink control interface * * Authors: * Roopa Prabhu <roopa@cumulusnetworks.com> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/etherdevice.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> #include <uapi/linux/if_bridge.h> #include <net/dst_metadata.h> #include "br_private.h" #include "br_private_tunnel.h" static size_t __get_vlan_tinfo_size(void) { return nla_total_size(0) + /* nest IFLA_BRIDGE_VLAN_TUNNEL_INFO */ nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_VLAN_TUNNEL_ID */ nla_total_size(sizeof(u16)) + /* IFLA_BRIDGE_VLAN_TUNNEL_VID */ nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ } bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *v_last) { __be32 tunid_curr = tunnel_id_to_key32(v_curr->tinfo.tunnel_id); __be32 tunid_last = tunnel_id_to_key32(v_last->tinfo.tunnel_id); return (be32_to_cpu(tunid_curr) - be32_to_cpu(tunid_last)) == 1; } static int __get_num_vlan_tunnel_infos(struct net_bridge_vlan_group *vg) { struct net_bridge_vlan *v, *vtbegin = NULL, *vtend = NULL; int num_tinfos = 0; /* Count number of vlan infos */ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { /* only a context, bridge vlan not activated */ if (!br_vlan_should_use(v) || !v->tinfo.tunnel_id) continue; if (!vtbegin) { goto initvars; } else if ((v->vid - vtend->vid) == 1 && vlan_tunid_inrange(v, vtend)) { vtend = v; continue; } else { if ((vtend->vid - vtbegin->vid) > 0) num_tinfos += 2; else num_tinfos += 1; } initvars: vtbegin = v; vtend = v; } if (vtbegin && vtend) { if ((vtend->vid - vtbegin->vid) > 0) num_tinfos += 2; else num_tinfos += 1; } return num_tinfos; } int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg) { int num_tinfos; if (!vg) return 0; rcu_read_lock(); num_tinfos = __get_num_vlan_tunnel_infos(vg); rcu_read_unlock(); return num_tinfos * __get_vlan_tinfo_size(); } static int br_fill_vlan_tinfo(struct sk_buff *skb, u16 vid, __be64 tunnel_id, u16 flags) { __be32 tid = tunnel_id_to_key32(tunnel_id); struct nlattr *tmap; tmap = nla_nest_start_noflag(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); if (!tmap) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID, be32_to_cpu(tid))) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_VID, vid)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, flags)) goto nla_put_failure; nla_nest_end(skb, tmap); return 0; nla_put_failure: nla_nest_cancel(skb, tmap); return -EMSGSIZE; } static int br_fill_vlan_tinfo_range(struct sk_buff *skb, struct net_bridge_vlan *vtbegin, struct net_bridge_vlan *vtend) { int err; if (vtend && (vtend->vid - vtbegin->vid) > 0) { /* add range to skb */ err = br_fill_vlan_tinfo(skb, vtbegin->vid, vtbegin->tinfo.tunnel_id, BRIDGE_VLAN_INFO_RANGE_BEGIN); if (err) return err; err = br_fill_vlan_tinfo(skb, vtend->vid, vtend->tinfo.tunnel_id, BRIDGE_VLAN_INFO_RANGE_END); if (err) return err; } else { err = br_fill_vlan_tinfo(skb, vtbegin->vid, vtbegin->tinfo.tunnel_id, 0); if (err) return err; } return 0; } int br_fill_vlan_tunnel_info(struct sk_buff *skb, struct net_bridge_vlan_group *vg) { struct net_bridge_vlan *vtbegin = NULL; struct net_bridge_vlan *vtend = NULL; struct net_bridge_vlan *v; int err; /* Count number of vlan infos */ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { /* only a context, bridge vlan not activated */ if (!br_vlan_should_use(v)) continue; if (!v->tinfo.tunnel_dst) continue; if (!vtbegin) { goto initvars; } else if ((v->vid - vtend->vid) == 1 && vlan_tunid_inrange(v, vtend)) { vtend = v; continue; } else { err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); if (err) return err; } initvars: vtbegin = v; vtend = v; } if (vtbegin) { err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); if (err) return err; } return 0; } static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = { [IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC] = { .strict_start_type = IFLA_BRIDGE_VLAN_TUNNEL_FLAGS + 1 }, [IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 }, [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 }, }; int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, u16 vid, u32 tun_id, bool *changed) { int err = 0; if (!p) return -EINVAL; switch (cmd) { case RTM_SETLINK: err = nbp_vlan_tunnel_info_add(p, vid, tun_id); if (!err) *changed = true; break; case RTM_DELLINK: if (!nbp_vlan_tunnel_info_delete(p, vid)) *changed = true; break; } return err; } int br_parse_vlan_tunnel_info(struct nlattr *attr, struct vtunnel_info *tinfo) { struct nlattr *tb[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1]; u32 tun_id; u16 vid, flags = 0; int err; memset(tinfo, 0, sizeof(*tinfo)); err = nla_parse_nested_deprecated(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr, vlan_tunnel_policy, NULL); if (err < 0) return err; if (!tb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || !tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) return -EINVAL; tun_id = nla_get_u32(tb[IFLA_BRIDGE_VLAN_TUNNEL_ID]); vid = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]); if (vid >= VLAN_VID_MASK) return -ERANGE; if (tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]) flags = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]); tinfo->tunid = tun_id; tinfo->vid = vid; tinfo->flags = flags; return 0; } /* send a notification if v_curr can't enter the range and start a new one */ static void __vlan_tunnel_handle_range(const struct net_bridge_port *p, struct net_bridge_vlan **v_start, struct net_bridge_vlan **v_end, int v_curr, bool curr_change) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; vg = nbp_vlan_group(p); if (!vg) return; v = br_vlan_find(vg, v_curr); if (!*v_start) goto out_init; if (v && curr_change && br_vlan_can_enter_range(v, *v_end)) { *v_end = v; return; } br_vlan_notify(p->br, p, (*v_start)->vid, (*v_end)->vid, RTM_NEWVLAN); out_init: /* we start a range only if there are any changes to notify about */ *v_start = curr_change ? v : NULL; *v_end = *v_start; } int br_process_vlan_tunnel_info(const struct net_bridge *br, const struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, struct vtunnel_info *tinfo_last, bool *changed) { int err; if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) return -EINVAL; memcpy(tinfo_last, tinfo_curr, sizeof(struct vtunnel_info)); } else if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END) { struct net_bridge_vlan *v_start = NULL, *v_end = NULL; int t, v; if (!(tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN)) return -EINVAL; if ((tinfo_curr->vid - tinfo_last->vid) != (tinfo_curr->tunid - tinfo_last->tunid)) return -EINVAL; t = tinfo_last->tunid; for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { bool curr_change = false; err = br_vlan_tunnel_info(p, cmd, v, t, &curr_change); if (err) break; t++; if (curr_change) *changed = curr_change; __vlan_tunnel_handle_range(p, &v_start, &v_end, v, curr_change); } if (v_start && v_end) br_vlan_notify(br, p, v_start->vid, v_end->vid, RTM_NEWVLAN); if (err) return err; memset(tinfo_last, 0, sizeof(struct vtunnel_info)); memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); } else { if (tinfo_last->flags) return -EINVAL; err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, tinfo_curr->tunid, changed); if (err) return err; br_vlan_notify(br, p, tinfo_curr->vid, 0, RTM_NEWVLAN); memset(tinfo_last, 0, sizeof(struct vtunnel_info)); memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); } return 0; } |
16 2 17 17 19 19 19 19 19 19 1 1 1 1 1 1 1 1 1 1 1 1 19 3 16 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 | // SPDX-License-Identifier: GPL-2.0-only /* * acl.c * * Copyright (C) 2004, 2008 Oracle. All rights reserved. * * CREDITS: * Lots of code in this file is copy from linux/fs/ext3/acl.c. * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> */ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> #include <cluster/masklog.h> #include "ocfs2.h" #include "alloc.h" #include "dlmglue.h" #include "file.h" #include "inode.h" #include "journal.h" #include "ocfs2_fs.h" #include "xattr.h" #include "acl.h" /* * Convert from xattr value to acl struct. */ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size) { int n, count; struct posix_acl *acl; if (!value) return NULL; if (size < sizeof(struct posix_acl_entry)) return ERR_PTR(-EINVAL); count = size / sizeof(struct posix_acl_entry); acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); for (n = 0; n < count; n++) { struct ocfs2_acl_entry *entry = (struct ocfs2_acl_entry *)value; acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); switch(acl->a_entries[n].e_tag) { case ACL_USER: acl->a_entries[n].e_uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); break; case ACL_GROUP: acl->a_entries[n].e_gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); break; default: break; } value += sizeof(struct posix_acl_entry); } return acl; } /* * Convert acl struct to xattr value. */ static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size) { struct ocfs2_acl_entry *entry = NULL; char *ocfs2_acl; size_t n; *size = acl->a_count * sizeof(struct posix_acl_entry); ocfs2_acl = kmalloc(*size, GFP_NOFS); if (!ocfs2_acl) return ERR_PTR(-ENOMEM); entry = (struct ocfs2_acl_entry *)ocfs2_acl; for (n = 0; n < acl->a_count; n++, entry++) { entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); switch(acl->a_entries[n].e_tag) { case ACL_USER: entry->e_id = cpu_to_le32( from_kuid(&init_user_ns, acl->a_entries[n].e_uid)); break; case ACL_GROUP: entry->e_id = cpu_to_le32( from_kgid(&init_user_ns, acl->a_entries[n].e_gid)); break; default: entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); break; } } return ocfs2_acl; } static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode, int type, struct buffer_head *di_bh) { int name_index; char *value = NULL; struct posix_acl *acl; int retval; switch (type) { case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; break; default: return ERR_PTR(-EINVAL); } retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0); if (retval > 0) { value = kmalloc(retval, GFP_NOFS); if (!value) return ERR_PTR(-ENOMEM); retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", value, retval); } if (retval > 0) acl = ocfs2_acl_from_xattr(value, retval); else if (retval == -ENODATA || retval == 0) acl = NULL; else acl = ERR_PTR(retval); kfree(value); return acl; } /* * Helper function to set i_mode in memory and disk. Some call paths * will not have di_bh or a journal handle to pass, in which case it * will create it's own. */ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, handle_t *handle, umode_t new_mode) { int ret, commit_handle = 0; struct ocfs2_dinode *di; if (di_bh == NULL) { ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; } } else get_bh(di_bh); if (handle == NULL) { handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out_brelse; } commit_handle = 1; } di = (struct ocfs2_dinode *)di_bh->b_data; ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } inode->i_mode = new_mode; inode_set_ctime_current(inode); di->i_mode = cpu_to_le16(inode->i_mode); di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, di_bh); out_commit: if (commit_handle) ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out_brelse: brelse(di_bh); out: return ret; } /* * Set the access or default ACL of an inode. */ static int ocfs2_set_acl(handle_t *handle, struct inode *inode, struct buffer_head *di_bh, int type, struct posix_acl *acl, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac) { int name_index; void *value = NULL; size_t size = 0; int ret; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; switch (type) { case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; default: return -EINVAL; } if (acl) { value = ocfs2_acl_to_xattr(acl, &size); if (IS_ERR(value)) return (int)PTR_ERR(value); } if (handle) ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index, "", value, size, 0, meta_ac, data_ac); else ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0); kfree(value); if (!ret) set_cached_acl(inode, type, acl); return ret; } int ocfs2_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct buffer_head *bh = NULL; int status, had_lock; struct ocfs2_lock_holder oh; struct inode *inode = d_inode(dentry); had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); if (had_lock < 0) return had_lock; if (type == ACL_TYPE_ACCESS && acl) { umode_t mode; status = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (status) goto unlock; status = ocfs2_acl_set_mode(inode, bh, NULL, mode); if (status) goto unlock; } status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); unlock: ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); brelse(bh); return status; } struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu) { struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; struct posix_acl *acl; int had_lock; struct ocfs2_lock_holder oh; if (rcu) return ERR_PTR(-ECHILD); osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return NULL; had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); if (had_lock < 0) return ERR_PTR(had_lock); down_read(&OCFS2_I(inode)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(inode, type, di_bh); up_read(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); return acl; } int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl; int ret; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return 0; down_read(&OCFS2_I(inode)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh); up_read(&OCFS2_I(inode)->ip_xattr_sem); if (IS_ERR_OR_NULL(acl)) return PTR_ERR_OR_ZERO(acl); ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); if (ret) return ret; ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, acl, NULL, NULL); posix_acl_release(acl); return ret; } /* * Initialize the ACLs of a new inode. If parent directory has default ACL, * then clone to new inode. Called from ocfs2_mknod. */ int ocfs2_init_acl(handle_t *handle, struct inode *inode, struct inode *dir, struct buffer_head *di_bh, struct buffer_head *dir_bh, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0, ret2; umode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { down_read(&OCFS2_I(dir)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, dir_bh); up_read(&OCFS2_I(dir)->ip_xattr_sem); if (IS_ERR(acl)) return PTR_ERR(acl); } if (!acl) { mode = inode->i_mode & ~current_umask(); ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); if (ret) { mlog_errno(ret); goto cleanup; } } } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { if (S_ISDIR(inode->i_mode)) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_DEFAULT, acl, meta_ac, data_ac); if (ret) goto cleanup; } mode = inode->i_mode; ret = __posix_acl_create(&acl, GFP_NOFS, &mode); if (ret < 0) return ret; ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); if (ret2) { mlog_errno(ret2); ret = ret2; goto cleanup; } if (ret > 0) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_ACCESS, acl, meta_ac, data_ac); } } cleanup: posix_acl_release(acl); return ret; } |
80 81 1 76 381 382 68 68 68 68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | // SPDX-License-Identifier: GPL-2.0 #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <net/gro_cells.h> #include <net/hotdata.h> struct gro_cell { struct sk_buff_head napi_skbs; struct napi_struct napi; }; int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct gro_cell *cell; int res; rcu_read_lock(); if (unlikely(!(dev->flags & IFF_UP))) goto drop; if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) { res = netif_rx(skb); goto unlock; } cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) { drop: dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); res = NET_RX_DROP; goto unlock; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); res = NET_RX_SUCCESS; unlock: rcu_read_unlock(); return res; } EXPORT_SYMBOL(gro_cells_receive); /* called under BH context */ static int gro_cell_poll(struct napi_struct *napi, int budget) { struct gro_cell *cell = container_of(napi, struct gro_cell, napi); struct sk_buff *skb; int work_done = 0; while (work_done < budget) { skb = __skb_dequeue(&cell->napi_skbs); if (!skb) break; napi_gro_receive(napi, skb); work_done++; } if (work_done < budget) napi_complete_done(napi, work_done); return work_done; } int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) { int i; gcells->cells = alloc_percpu(struct gro_cell); if (!gcells->cells) return -ENOMEM; for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); netif_napi_add(dev, &cell->napi, gro_cell_poll); napi_enable(&cell->napi); } return 0; } EXPORT_SYMBOL(gro_cells_init); struct percpu_free_defer { struct rcu_head rcu; void __percpu *ptr; }; static void percpu_free_defer_callback(struct rcu_head *head) { struct percpu_free_defer *defer; defer = container_of(head, struct percpu_free_defer, rcu); free_percpu(defer->ptr); kfree(defer); } void gro_cells_destroy(struct gro_cells *gcells) { struct percpu_free_defer *defer; int i; if (!gcells->cells) return; for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); napi_disable(&cell->napi); __netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } /* We need to observe an rcu grace period before freeing ->cells, * because netpoll could access dev->napi_list under rcu protection. * Try hard using call_rcu() instead of synchronize_rcu(), * because we might be called from cleanup_net(), and we * definitely do not want to block this critical task. */ defer = kmalloc(sizeof(*defer), GFP_KERNEL | __GFP_NOWARN); if (likely(defer)) { defer->ptr = gcells->cells; call_rcu(&defer->rcu, percpu_free_defer_callback); } else { /* We do not hold RTNL at this point, synchronize_net() * would not be able to expedite this sync. */ synchronize_rcu_expedited(); free_percpu(gcells->cells); } gcells->cells = NULL; } EXPORT_SYMBOL(gro_cells_destroy); |
12 15 15 14 15 1 13 1 2 2 12 12 12 12 12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 | // SPDX-License-Identifier: GPL-2.0-only /* * spectrum management * * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2018, 2020, 2022-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <net/cfg80211.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "sta_info.h" #include "wme.h" static bool wbcs_elem_to_chandef(const struct ieee80211_wide_bw_chansw_ie *wbcs_elem, struct cfg80211_chan_def *chandef) { u8 ccfs0 = wbcs_elem->new_center_freq_seg0; u8 ccfs1 = wbcs_elem->new_center_freq_seg1; u32 cf0 = ieee80211_channel_to_frequency(ccfs0, chandef->chan->band); u32 cf1 = ieee80211_channel_to_frequency(ccfs1, chandef->chan->band); switch (wbcs_elem->new_channel_width) { case IEEE80211_VHT_CHANWIDTH_160MHZ: /* deprecated encoding */ chandef->width = NL80211_CHAN_WIDTH_160; chandef->center_freq1 = cf0; break; case IEEE80211_VHT_CHANWIDTH_80P80MHZ: /* deprecated encoding */ chandef->width = NL80211_CHAN_WIDTH_80P80; chandef->center_freq1 = cf0; chandef->center_freq2 = cf1; break; case IEEE80211_VHT_CHANWIDTH_80MHZ: chandef->width = NL80211_CHAN_WIDTH_80; chandef->center_freq1 = cf0; if (ccfs1) { u8 diff = abs(ccfs0 - ccfs1); if (diff == 8) { chandef->width = NL80211_CHAN_WIDTH_160; chandef->center_freq1 = cf1; } else if (diff > 8) { chandef->width = NL80211_CHAN_WIDTH_80P80; chandef->center_freq2 = cf1; } } break; case IEEE80211_VHT_CHANWIDTH_USE_HT: default: /* If the WBCS Element is present, new channel bandwidth is * at least 40 MHz. */ chandef->width = NL80211_CHAN_WIDTH_40; chandef->center_freq1 = cf0; break; } return cfg80211_chandef_valid(chandef); } static void validate_chandef_by_ht_vht_oper(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, u32 vht_cap_info, struct cfg80211_chan_def *chandef) { u32 control_freq, center_freq1, center_freq2; enum nl80211_chan_width chan_width; struct ieee80211_ht_operation ht_oper; struct ieee80211_vht_operation vht_oper; if (conn->mode < IEEE80211_CONN_MODE_HT || conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { chandef->chan = NULL; return; } control_freq = chandef->chan->center_freq; center_freq1 = chandef->center_freq1; center_freq2 = chandef->center_freq2; chan_width = chandef->width; ht_oper.primary_chan = ieee80211_frequency_to_channel(control_freq); if (control_freq != center_freq1) ht_oper.ht_param = control_freq > center_freq1 ? IEEE80211_HT_PARAM_CHA_SEC_BELOW : IEEE80211_HT_PARAM_CHA_SEC_ABOVE; else ht_oper.ht_param = IEEE80211_HT_PARAM_CHA_SEC_NONE; ieee80211_chandef_ht_oper(&ht_oper, chandef); if (conn->mode < IEEE80211_CONN_MODE_VHT) return; vht_oper.center_freq_seg0_idx = ieee80211_frequency_to_channel(center_freq1); vht_oper.center_freq_seg1_idx = center_freq2 ? ieee80211_frequency_to_channel(center_freq2) : 0; switch (chan_width) { case NL80211_CHAN_WIDTH_320: WARN_ON(1); break; case NL80211_CHAN_WIDTH_160: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; vht_oper.center_freq_seg1_idx = vht_oper.center_freq_seg0_idx; vht_oper.center_freq_seg0_idx += control_freq < center_freq1 ? -8 : 8; break; case NL80211_CHAN_WIDTH_80P80: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; break; case NL80211_CHAN_WIDTH_80: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; break; default: vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_USE_HT; break; } ht_oper.operation_mode = le16_encode_bits(vht_oper.center_freq_seg1_idx, IEEE80211_HT_OP_MODE_CCFS2_MASK); if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, &vht_oper, &ht_oper, chandef)) chandef->chan = NULL; } static void validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = sdata->local; u32 control_freq, center_freq1, center_freq2; enum nl80211_chan_width chan_width; struct { struct ieee80211_he_operation _oper; struct ieee80211_he_6ghz_oper _6ghz_oper; } __packed he; struct { struct ieee80211_eht_operation _oper; struct ieee80211_eht_operation_info _oper_info; } __packed eht; const struct ieee80211_eht_operation *eht_oper; if (conn->mode < IEEE80211_CONN_MODE_HE) { chandef->chan = NULL; return; } control_freq = chandef->chan->center_freq; center_freq1 = chandef->center_freq1; center_freq2 = chandef->center_freq2; chan_width = chandef->width; he._oper.he_oper_params = le32_encode_bits(1, IEEE80211_HE_OPERATION_6GHZ_OP_INFO); he._6ghz_oper.primary = ieee80211_frequency_to_channel(control_freq); he._6ghz_oper.ccfs0 = ieee80211_frequency_to_channel(center_freq1); he._6ghz_oper.ccfs1 = center_freq2 ? ieee80211_frequency_to_channel(center_freq2) : 0; switch (chan_width) { case NL80211_CHAN_WIDTH_320: he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -16 : 16; he._6ghz_oper.control = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ; break; case NL80211_CHAN_WIDTH_160: he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -8 : 8; fallthrough; case NL80211_CHAN_WIDTH_80P80: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ; break; case NL80211_CHAN_WIDTH_80: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ; break; case NL80211_CHAN_WIDTH_40: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ; break; default: he._6ghz_oper.control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ; break; } if (conn->mode < IEEE80211_CONN_MODE_EHT) { eht_oper = NULL; } else { eht._oper.params = IEEE80211_EHT_OPER_INFO_PRESENT; eht._oper_info.control = he._6ghz_oper.control; eht._oper_info.ccfs0 = he._6ghz_oper.ccfs0; eht._oper_info.ccfs1 = he._6ghz_oper.ccfs1; eht_oper = &eht._oper; } if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper, eht_oper, chandef)) chandef->chan = NULL; } int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band current_band, u32 vht_cap_info, struct ieee80211_conn_settings *conn, u8 *bssid, bool unprot_action, struct ieee80211_csa_ie *csa_ie) { enum nl80211_band new_band = current_band; int new_freq; u8 new_chan_no = 0, new_op_class = 0; struct ieee80211_channel *new_chan; struct cfg80211_chan_def new_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; const struct ieee80211_bandwidth_indication *bwi; const struct ieee80211_ext_chansw_ie *ext_chansw_elem; int secondary_channel_offset = -1; memset(csa_ie, 0, sizeof(*csa_ie)); sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; bwi = elems->bandwidth_indication; ext_chansw_elem = elems->ext_chansw_ie; if (conn->mode < IEEE80211_CONN_MODE_HT || conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; } if (conn->mode < IEEE80211_CONN_MODE_VHT) wide_bw_chansw_ie = NULL; if (ext_chansw_elem) { new_op_class = ext_chansw_elem->new_operating_class; if (!ieee80211_operating_class_to_band(new_op_class, &new_band)) { new_op_class = 0; if (!unprot_action) sdata_info(sdata, "cannot understand ECSA IE operating class, %d, ignoring\n", ext_chansw_elem->new_operating_class); } else { new_chan_no = ext_chansw_elem->new_ch_num; csa_ie->count = ext_chansw_elem->count; csa_ie->mode = ext_chansw_elem->mode; } } if (!new_op_class && elems->ch_switch_ie) { new_chan_no = elems->ch_switch_ie->new_ch_num; csa_ie->count = elems->ch_switch_ie->count; csa_ie->mode = elems->ch_switch_ie->mode; } /* nothing here we understand */ if (!new_chan_no) return 1; /* Mesh Channel Switch Parameters Element */ if (elems->mesh_chansw_params_ie) { csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl; csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; csa_ie->pre_value = le16_to_cpu( elems->mesh_chansw_params_ie->mesh_pre_value); if (elems->mesh_chansw_params_ie->mesh_flags & WLAN_EID_CHAN_SWITCH_PARAM_REASON) csa_ie->reason_code = le16_to_cpu( elems->mesh_chansw_params_ie->mesh_reason); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) { if (!unprot_action) sdata_info(sdata, "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n", bssid, new_freq); return -EINVAL; } if (sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; } else if (conn->mode >= IEEE80211_CONN_MODE_HT) { /* If the secondary channel offset IE is not present, * we can't know what's the post-CSA offset, so the * best we can do is use 20MHz. */ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; } switch (secondary_channel_offset) { default: /* secondary_channel_offset was present but is invalid */ case IEEE80211_HT_PARAM_CHA_SEC_NONE: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT20); break; case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT40PLUS); break; case IEEE80211_HT_PARAM_CHA_SEC_BELOW: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT40MINUS); break; case -1: cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_NO_HT); /* keep width for 5/10 MHz channels */ switch (sdata->vif.bss_conf.chanreq.oper.width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: csa_ie->chanreq.oper.width = sdata->vif.bss_conf.chanreq.oper.width; break; default: break; } break; } /* capture the AP configuration */ csa_ie->chanreq.ap = csa_ie->chanreq.oper; /* parse one of the Elements to build a new chandef */ memset(&new_chandef, 0, sizeof(new_chandef)); new_chandef.chan = new_chan; if (bwi) { /* start with the CSA one */ new_chandef = csa_ie->chanreq.oper; /* and update the width accordingly */ ieee80211_chandef_eht_oper(&bwi->info, &new_chandef); if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT) new_chandef.punctured = get_unaligned_le16(bwi->info.optional); } else if (!wide_bw_chansw_ie || !wbcs_elem_to_chandef(wide_bw_chansw_ie, &new_chandef)) { if (!ieee80211_operating_class_to_chandef(new_op_class, new_chan, &new_chandef)) new_chandef = csa_ie->chanreq.oper; } /* check if the new chandef fits the capabilities */ if (new_band == NL80211_BAND_6GHZ) validate_chandef_by_6ghz_he_eht_oper(sdata, conn, &new_chandef); else validate_chandef_by_ht_vht_oper(sdata, conn, vht_cap_info, &new_chandef); /* if data is there validate the bandwidth & use it */ if (new_chandef.chan) { /* capture the AP chandef before (potential) downgrading */ csa_ie->chanreq.ap = new_chandef; if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 && new_chandef.width == NL80211_CHAN_WIDTH_320) ieee80211_chandef_downgrade(&new_chandef, NULL); if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 && (new_chandef.width == NL80211_CHAN_WIDTH_80P80 || new_chandef.width == NL80211_CHAN_WIDTH_160)) ieee80211_chandef_downgrade(&new_chandef, NULL); if (!cfg80211_chandef_compatible(&new_chandef, &csa_ie->chanreq.oper)) { sdata_info(sdata, "BSS %pM: CSA has inconsistent channel data, disconnecting\n", bssid); return -EINVAL; } csa_ie->chanreq.oper = new_chandef; } if (elems->max_channel_switch_time) csa_ie->max_switch_time = (elems->max_channel_switch_time[0] << 0) | (elems->max_channel_switch_time[1] << 8) | (elems->max_channel_switch_time[2] << 16); return 0; } static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata, struct ieee80211_msrment_ie *request_ie, const u8 *da, const u8 *bssid, u8 dialog_token) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *msr_report; skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + sizeof(struct ieee80211_msrment_ie)); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); msr_report = skb_put_zero(skb, 24); memcpy(msr_report->da, da, ETH_ALEN); memcpy(msr_report->sa, sdata->vif.addr, ETH_ALEN); memcpy(msr_report->bssid, bssid, ETH_ALEN); msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; msr_report->u.action.u.measurement.action_code = WLAN_ACTION_SPCT_MSR_RPRT; msr_report->u.action.u.measurement.dialog_token = dialog_token; msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; msr_report->u.action.u.measurement.length = sizeof(struct ieee80211_msrment_ie); memset(&msr_report->u.action.u.measurement.msr_elem, 0, sizeof(struct ieee80211_msrment_ie)); msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; msr_report->u.action.u.measurement.msr_elem.mode |= IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; ieee80211_tx_skb(sdata, skb); } void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { /* * Ignoring measurement request is spec violation. * Mandatory measurements must be reported optional * measurements might be refused or reported incapable * For now just refuse * TODO: Answer basic measurement as unmeasured */ ieee80211_send_refuse_measurement_request(sdata, &mgmt->u.action.u.measurement.msr_elem, mgmt->sa, mgmt->bssid, mgmt->u.action.u.measurement.dialog_token); } |
2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H #include <linux/perf_event.h> #include <uapi/linux/hw_breakpoint.h> #ifdef CONFIG_HAVE_HW_BREAKPOINT enum bp_type_idx { TYPE_INST = 0, #if defined(CONFIG_HAVE_MIXED_BREAKPOINTS_REGS) TYPE_DATA = 0, #else TYPE_DATA = 1, #endif TYPE_MAX }; extern int __init init_hw_breakpoint(void); static inline void hw_breakpoint_init(struct perf_event_attr *attr) { memset(attr, 0, sizeof(*attr)); attr->type = PERF_TYPE_BREAKPOINT; attr->size = sizeof(*attr); /* * As it's for in-kernel or ptrace use, we want it to be pinned * and to call its callback every hits. */ attr->pinned = 1; attr->sample_period = 1; } static inline void ptrace_breakpoint_init(struct perf_event_attr *attr) { hw_breakpoint_init(attr); attr->exclude_kernel = 1; } static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; } static inline int hw_breakpoint_type(struct perf_event *bp) { return bp->attr.bp_type; } static inline unsigned long hw_breakpoint_len(struct perf_event *bp) { return bp->attr.bp_len; } extern struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context, struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ extern int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr); extern int modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, bool check); /* * Kernel breakpoints are not associated with any particular thread. */ extern struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context, int cpu); extern struct perf_event * __percpu * register_wide_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context); extern int register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events); extern bool hw_breakpoint_is_used(void); extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) { return &bp->hw.info; } #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline int __init init_hw_breakpoint(void) { return 0; } static inline struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context, struct task_struct *tsk) { return NULL; } static inline int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { return -ENOSYS; } static inline int modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, bool check) { return -ENOSYS; } static inline struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context, int cpu) { return NULL; } static inline struct perf_event * __percpu * register_wide_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline void unregister_hw_breakpoint(struct perf_event *bp) { } static inline void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { } static inline bool hw_breakpoint_is_used(void) { return false; } static inline int reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } static inline void release_bp_slot(struct perf_event *bp) { } static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) { return NULL; } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* _LINUX_HW_BREAKPOINT_H */ |
46 46 46 46 46 46 148 4 2 4 4 4 4478 3 22 7 18 46 46 46 46 46 36 4 4 4 257 216 46 36 1029 3 1024 1028 4 4 5 5 28 14 14 4 17 31 21 7 1 16 46 15 1 2 2 8 2 7 1 3 3 10 10 15 3 4 1 23 23 18 3 9 3 3 9 3 3 3 3 6 3 5 2 4 1 11 1 10 2 11 7 7 7 99 99 102 102 96 2 94 95 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 | // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/workqueue.h> #include <linux/rtnetlink.h> #include <linux/cache.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/delay.h> #include <linux/sched.h> #include <linux/idr.h> #include <linux/rculist.h> #include <linux/nsproxy.h> #include <linux/fs.h> #include <linux/proc_ns.h> #include <linux/file.h> #include <linux/export.h> #include <linux/user_namespace.h> #include <linux/net_namespace.h> #include <linux/sched/task.h> #include <linux/uidgid.h> #include <linux/cookie.h> #include <linux/proc_fs.h> #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> /* * Our network namespace constructor/destructor lists */ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); /* Protects net_namespace_list. Nests iside rtnl_lock() */ DECLARE_RWSEM(net_rwsem); EXPORT_SYMBOL_GPL(net_rwsem); #ifdef CONFIG_KEYS static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; #endif struct net init_net; EXPORT_SYMBOL(init_net); static bool init_net_initialized; /* * pernet_ops_rwsem: protects: pernet_list, net_generic_ids, * init_net_initialized and first_device pointer. * This is internal net namespace object. Please, don't use it * outside. */ DECLARE_RWSEM(pernet_ops_rwsem); EXPORT_SYMBOL_GPL(pernet_ops_rwsem); #define MIN_PERNET_OPS_ID \ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); unsigned int generic_size; struct net_generic *ng; generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) ng->s.len = gen_ptrs; return ng; } static int net_assign_generic(struct net *net, unsigned int id, void *data) { struct net_generic *ng, *old_ng; BUG_ON(id < MIN_PERNET_OPS_ID); old_ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); if (old_ng->s.len > id) { old_ng->ptr[id] = data; return 0; } ng = net_alloc_generic(); if (!ng) return -ENOMEM; /* * Some synchronisation notes: * * The net_generic explores the net->gen array inside rcu * read section. Besides once set the net->gen->ptr[x] * pointer never changes (see rules in netns/generic.h). * * That said, we simply duplicate this array and schedule * the old copy for kfree after a grace period. */ memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); ng->ptr[id] = data; rcu_assign_pointer(net->gen, ng); kfree_rcu(old_ng, s.rcu); return 0; } static int ops_init(const struct pernet_operations *ops, struct net *net) { struct net_generic *ng; int err = -ENOMEM; void *data = NULL; if (ops->id) { data = kzalloc(ops->size, GFP_KERNEL); if (!data) goto out; err = net_assign_generic(net, *ops->id, data); if (err) goto cleanup; } err = 0; if (ops->init) err = ops->init(net); if (!err) return 0; if (ops->id) { ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); ng->ptr[*ops->id] = NULL; } cleanup: kfree(data); out: return err; } static void ops_pre_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { struct net *net; if (ops->pre_exit) { list_for_each_entry(net, net_exit_list, exit_list) ops->pre_exit(net); } } static void ops_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { struct net *net; if (ops->exit) { list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); cond_resched(); } } if (ops->exit_batch) ops->exit_batch(net_exit_list); } static void ops_free_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { struct net *net; if (ops->id) { list_for_each_entry(net, net_exit_list, exit_list) kfree(net_generic(net, *ops->id)); } } /* should be called with nsid_lock held */ static int alloc_netid(struct net *net, struct net *peer, int reqid) { int min = 0, max = 0; if (reqid >= 0) { min = reqid; max = reqid + 1; } return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); } /* This function is used by idr_for_each(). If net is equal to peer, the * function returns the id so that idr_for_each() stops. Because we cannot * returns the id 0 (idr_for_each() will not stop), we return the magic value * NET_ID_ZERO (-1) for it. */ #define NET_ID_ZERO -1 static int net_eq_idr(int id, void *net, void *peer) { if (net_eq(net, peer)) return id ? : NET_ID_ZERO; return 0; } /* Must be called from RCU-critical section or with nsid_lock held */ static int __peernet2id(const struct net *net, struct net *peer) { int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); /* Magic value for id 0. */ if (id == NET_ID_ZERO) return 0; if (id > 0) return id; return NETNSA_NSID_NOT_ASSIGNED; } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct nlmsghdr *nlh, gfp_t gfp); /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) { int id; if (refcount_read(&net->ns.count) == 0) return NETNSA_NSID_NOT_ASSIGNED; spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); if (id >= 0) { spin_unlock_bh(&net->nsid_lock); return id; } /* When peer is obtained from RCU lists, we may race with * its cleanup. Check whether it's alive, and this guarantees * we never hash a peer back to net->netns_ids, after it has * just been idr_remove()'d from there in cleanup_net(). */ if (!maybe_get_net(peer)) { spin_unlock_bh(&net->nsid_lock); return NETNSA_NSID_NOT_ASSIGNED; } id = alloc_netid(net, peer, -1); spin_unlock_bh(&net->nsid_lock); put_net(peer); if (id < 0) return NETNSA_NSID_NOT_ASSIGNED; rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); return id; } EXPORT_SYMBOL_GPL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(const struct net *net, struct net *peer) { int id; rcu_read_lock(); id = __peernet2id(net, peer); rcu_read_unlock(); return id; } EXPORT_SYMBOL(peernet2id); /* This function returns true is the peer netns has an id assigned into the * current netns. */ bool peernet_has_id(const struct net *net, struct net *peer) { return peernet2id(net, peer) >= 0; } struct net *get_net_ns_by_id(const struct net *net, int id) { struct net *peer; if (id < 0) return NULL; rcu_read_lock(); peer = idr_find(&net->netns_ids, id); if (peer) peer = maybe_get_net(peer); rcu_read_unlock(); return peer; } EXPORT_SYMBOL_GPL(get_net_ns_by_id); static __net_init void preinit_net_sysctl(struct net *net) { net->core.sysctl_somaxconn = SOMAXCONN; /* Limits per socket sk_omem_alloc usage. * TCP zerocopy regular usage needs 128 KB. */ net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; } /* init code that must occur even if setup_net() is not called. */ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ns) { refcount_set(&net->passive, 1); refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt"); get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); mutex_init(&net->ipv4.ra_mutex); preinit_net_sysctl(net); } /* * setup_net runs the initializers for the network namespace object. */ static __net_init int setup_net(struct net *net) { /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; LIST_HEAD(net_exit_list); LIST_HEAD(dev_kill_list); int error = 0; preempt_disable(); net->net_cookie = gen_cookie_next(&net_cookie); preempt_enable(); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) goto out_undo; } down_write(&net_rwsem); list_add_tail_rcu(&net->list, &net_namespace_list); up_write(&net_rwsem); out: return error; out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ list_add(&net->exit_list, &net_exit_list); saved_ops = ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_pre_exit_list(ops, &net_exit_list); synchronize_rcu(); ops = saved_ops; rtnl_lock(); list_for_each_entry_continue_reverse(ops, &pernet_list, list) { if (ops->exit_batch_rtnl) ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); } unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); rcu_barrier(); goto out; } #ifdef CONFIG_NET_NS static struct ucounts *inc_net_namespaces(struct user_namespace *ns) { return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); } static void dec_net_namespaces(struct ucounts *ucounts) { dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); } static struct kmem_cache *net_cachep __ro_after_init; static struct workqueue_struct *netns_wq; static struct net *net_alloc(void) { struct net *net = NULL; struct net_generic *ng; ng = net_alloc_generic(); if (!ng) goto out; net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); if (!net) goto out_free; #ifdef CONFIG_KEYS net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL); if (!net->key_domain) goto out_free_2; refcount_set(&net->key_domain->usage, 1); #endif rcu_assign_pointer(net->gen, ng); out: return net; #ifdef CONFIG_KEYS out_free_2: kmem_cache_free(net_cachep, net); net = NULL; #endif out_free: kfree(ng); goto out; } static void net_free(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); /* There should not be any trackers left there. */ ref_tracker_dir_exit(&net->notrefcnt_tracker); kmem_cache_free(net_cachep, net); } } void net_drop_ns(void *p) { struct net *net = (struct net *)p; if (net) net_free(net); } struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net) { struct ucounts *ucounts; struct net *net; int rv; if (!(flags & CLONE_NEWNET)) return get_net(old_net); ucounts = inc_net_namespaces(user_ns); if (!ucounts) return ERR_PTR(-ENOSPC); net = net_alloc(); if (!net) { rv = -ENOMEM; goto dec_ucounts; } preinit_net(net, user_ns); net->ucounts = ucounts; get_user_ns(user_ns); rv = down_read_killable(&pernet_ops_rwsem); if (rv < 0) goto put_userns; rv = setup_net(net); up_read(&pernet_ops_rwsem); if (rv < 0) { put_userns: #ifdef CONFIG_KEYS key_remove_domain(net->key_domain); #endif put_user_ns(user_ns); net_free(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); } return net; } /** * net_ns_get_ownership - get sysfs ownership data for @net * @net: network namespace in question (can be NULL) * @uid: kernel user ID for sysfs objects * @gid: kernel group ID for sysfs objects * * Returns the uid/gid pair of root in the user namespace associated with the * given network namespace. */ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid) { if (net) { kuid_t ns_root_uid = make_kuid(net->user_ns, 0); kgid_t ns_root_gid = make_kgid(net->user_ns, 0); if (uid_valid(ns_root_uid)) *uid = ns_root_uid; if (gid_valid(ns_root_gid)) *gid = ns_root_gid; } else { *uid = GLOBAL_ROOT_UID; *gid = GLOBAL_ROOT_GID; } } EXPORT_SYMBOL_GPL(net_ns_get_ownership); static void unhash_nsid(struct net *net, struct net *last) { struct net *tmp; /* This function is only called from cleanup_net() work, * and this work is the only process, that may delete * a net from net_namespace_list. So, when the below * is executing, the list may only grow. Thus, we do not * use for_each_net_rcu() or net_rwsem. */ for_each_net(tmp) { int id; spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); spin_unlock_bh(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); LIST_HEAD(dev_kill_list); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); down_read(&pernet_ops_rwsem); /* Don't let anyone else find us. */ down_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) list_del_rcu(&net->list); /* Cache last net. After we unlock rtnl, no one new net * added to net_namespace_list can assign nsid pointer * to a net from net_kill_list (see peernet2id_alloc()). * So, we skip them in unhash_nsid(). * * Note, that unhash_nsid() does not delete nsid links * between net_kill_list's nets, as they've already * deleted from net_namespace_list. But, this would be * useless anyway, as netns_ids are destroyed there. */ last = list_last_entry(&net_namespace_list, struct net, list); up_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) { unhash_nsid(net, last); list_add_tail(&net->exit_list, &net_exit_list); } /* Run all of the network namespace pre_exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_pre_exit_list(ops, &net_exit_list); /* * Another CPU might be rcu-iterating the list, wait for it. * This needs to be before calling the exit() notifiers, so * the rcu_barrier() below isn't sufficient alone. * Also the pre_exit() and exit() methods need this barrier. */ synchronize_rcu_expedited(); rtnl_lock(); list_for_each_entry_reverse(ops, &pernet_list, list) { if (ops->exit_batch_rtnl) ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); } unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); /* Free the net generic variables */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); up_read(&pernet_ops_rwsem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. */ rcu_barrier(); /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); dec_net_namespaces(net->ucounts); #ifdef CONFIG_KEYS key_remove_domain(net->key_domain); #endif put_user_ns(net->user_ns); net_free(net); } } /** * net_ns_barrier - wait until concurrent net_cleanup_work is done * * cleanup_net runs from work queue and will first remove namespaces * from the global list, then run net exit functions. * * Call this in module exit path to make sure that all netns * ->exit ops have been invoked before the function is removed. */ void net_ns_barrier(void) { down_write(&pernet_ops_rwsem); up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL(net_ns_barrier); static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { ref_tracker_dir_exit(&net->refcnt_tracker); /* Cleanup the network namespace in process context */ if (llist_add(&net->cleanup_list, &cleanup_list)) queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); /** * get_net_ns - increment the refcount of the network namespace * @ns: common namespace (net) * * Returns the net's common namespace or ERR_PTR() if ref is zero. */ struct ns_common *get_net_ns(struct ns_common *ns) { struct net *net; net = maybe_get_net(container_of(ns, struct net, ns)); if (net) return &net->ns; return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(get_net_ns); struct net *get_net_ns_by_fd(int fd) { struct fd f = fdget(fd); struct net *net = ERR_PTR(-EINVAL); if (!fd_file(f)) return ERR_PTR(-EBADF); if (proc_ns_file(fd_file(f))) { struct ns_common *ns = get_proc_ns(file_inode(fd_file(f))); if (ns->ops == &netns_operations) net = get_net(container_of(ns, struct net, ns)); } fdput(f); return net; } EXPORT_SYMBOL_GPL(get_net_ns_by_fd); #endif struct net *get_net_ns_by_pid(pid_t pid) { struct task_struct *tsk; struct net *net; /* Lookup the network namespace */ net = ERR_PTR(-ESRCH); rcu_read_lock(); tsk = find_task_by_vpid(pid); if (tsk) { struct nsproxy *nsproxy; task_lock(tsk); nsproxy = tsk->nsproxy; if (nsproxy) net = get_net(nsproxy->net_ns); task_unlock(tsk); } rcu_read_unlock(); return net; } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); static __net_init int net_ns_net_init(struct net *net) { #ifdef CONFIG_NET_NS net->ns.ops = &netns_operations; #endif return ns_alloc_inum(&net->ns); } static __net_exit void net_ns_net_exit(struct net *net) { ns_free_inum(&net->ns); } static struct pernet_operations __net_initdata net_ns_ops = { .init = net_ns_net_init, .exit = net_ns_net_exit, }; static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { [NETNSA_NONE] = { .type = NLA_UNSPEC }, [NETNSA_NSID] = { .type = NLA_S32 }, [NETNSA_PID] = { .type = NLA_U32 }, [NETNSA_FD] = { .type = NLA_U32 }, [NETNSA_TARGET_NSID] = { .type = NLA_S32 }, }; static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; struct nlattr *nla; struct net *peer; int nsid, err; err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); if (err < 0) return err; if (!tb[NETNSA_NSID]) { NL_SET_ERR_MSG(extack, "nsid is missing"); return -EINVAL; } nsid = nla_get_s32(tb[NETNSA_NSID]); if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); nla = tb[NETNSA_PID]; } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); nla = tb[NETNSA_FD]; } else { NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; } if (IS_ERR(peer)) { NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); } spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { spin_unlock_bh(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, "Peer netns already has a nsid assigned"); goto out; } err = alloc_netid(net, peer, nsid); spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); err = 0; } else if (err == -ENOSPC && nsid >= 0) { err = -EEXIST; NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); NL_SET_ERR_MSG(extack, "The specified nsid is already used"); } out: put_net(peer); return err; } static int rtnl_net_get_size(void) { return NLMSG_ALIGN(sizeof(struct rtgenmsg)) + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */ ; } struct net_fill_args { u32 portid; u32 seq; int flags; int cmd; int nsid; bool add_ref; int ref_nsid; }; static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args) { struct nlmsghdr *nlh; struct rtgenmsg *rth; nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth), args->flags); if (!nlh) return -EMSGSIZE; rth = nlmsg_data(nlh); rth->rtgen_family = AF_UNSPEC; if (nla_put_s32(skb, NETNSA_NSID, args->nsid)) goto nla_put_failure; if (args->add_ref && nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int rtnl_net_valid_getid_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { int i, err; if (!netlink_strict_get_check(skb)) return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); if (err) return err; for (i = 0; i <= NETNSA_MAX; i++) { if (!tb[i]) continue; switch (i) { case NETNSA_PID: case NETNSA_FD: case NETNSA_NSID: case NETNSA_TARGET_NSID: break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request"); return -EINVAL; } } return 0; } static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; struct net_fill_args fillargs = { .portid = NETLINK_CB(skb).portid, .seq = nlh->nlmsg_seq, .cmd = RTM_NEWNSID, }; struct net *peer, *target = net; struct nlattr *nla; struct sk_buff *msg; int err; err = rtnl_net_valid_getid_req(skb, nlh, tb, extack); if (err < 0) return err; if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); nla = tb[NETNSA_PID]; } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); nla = tb[NETNSA_FD]; } else if (tb[NETNSA_NSID]) { peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID])); if (!peer) peer = ERR_PTR(-ENOENT); nla = tb[NETNSA_NSID]; } else { NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; } if (IS_ERR(peer)) { NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); } if (tb[NETNSA_TARGET_NSID]) { int id = nla_get_s32(tb[NETNSA_TARGET_NSID]); target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id); if (IS_ERR(target)) { NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]); NL_SET_ERR_MSG(extack, "Target netns reference is invalid"); err = PTR_ERR(target); goto out; } fillargs.add_ref = true; fillargs.ref_nsid = peernet2id(net, peer); } msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); if (!msg) { err = -ENOMEM; goto out; } fillargs.nsid = peernet2id(target, peer); err = rtnl_net_fill(msg, &fillargs); if (err < 0) goto err_out; err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); goto out; err_out: nlmsg_free(msg); out: if (fillargs.add_ref) put_net(target); put_net(peer); return err; } struct rtnl_net_dump_cb { struct net *tgt_net; struct net *ref_net; struct sk_buff *skb; struct net_fill_args fillargs; int idx; int s_idx; }; /* Runs in RCU-critical section. */ static int rtnl_net_dumpid_one(int id, void *peer, void *data) { struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; int ret; if (net_cb->idx < net_cb->s_idx) goto cont; net_cb->fillargs.nsid = id; if (net_cb->fillargs.add_ref) net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer); ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs); if (ret < 0) return ret; cont: net_cb->idx++; return 0; } static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk, struct rtnl_net_dump_cb *net_cb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[NETNSA_MAX + 1]; int err, i; err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); if (err < 0) return err; for (i = 0; i <= NETNSA_MAX; i++) { if (!tb[i]) continue; if (i == NETNSA_TARGET_NSID) { struct net *net; net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i])); if (IS_ERR(net)) { NL_SET_BAD_ATTR(extack, tb[i]); NL_SET_ERR_MSG(extack, "Invalid target network namespace id"); return PTR_ERR(net); } net_cb->fillargs.add_ref = true; net_cb->ref_net = net_cb->tgt_net; net_cb->tgt_net = net; } else { NL_SET_BAD_ATTR(extack, tb[i]); NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); return -EINVAL; } } return 0; } static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) { struct rtnl_net_dump_cb net_cb = { .tgt_net = sock_net(skb->sk), .skb = skb, .fillargs = { .portid = NETLINK_CB(cb->skb).portid, .seq = cb->nlh->nlmsg_seq, .flags = NLM_F_MULTI, .cmd = RTM_NEWNSID, }, .idx = 0, .s_idx = cb->args[0], }; int err = 0; if (cb->strict_check) { err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb); if (err < 0) goto end; } rcu_read_lock(); idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb); rcu_read_unlock(); cb->args[0] = net_cb.idx; end: if (net_cb.fillargs.add_ref) put_net(net_cb.tgt_net); return err; } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct nlmsghdr *nlh, gfp_t gfp) { struct net_fill_args fillargs = { .portid = portid, .seq = nlh ? nlh->nlmsg_seq : 0, .cmd = cmd, .nsid = id, }; struct sk_buff *msg; int err = -ENOMEM; msg = nlmsg_new(rtnl_net_get_size(), gfp); if (!msg) goto out; err = rtnl_net_fill(msg, &fillargs); if (err < 0) goto err_out; rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp); return; err_out: nlmsg_free(msg); out: rtnl_set_sk_err(net, RTNLGRP_NSID, err); } #ifdef CONFIG_NET_NS static void __init netns_ipv4_struct_check(void) { /* TX readonly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_early_retrans); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_tso_win_divisor); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_tso_rtt_log); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_autocorking); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_min_snd_mss); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_notsent_lowat); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_limit_output_bytes); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_min_rtt_wlen); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_wmem); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_ip_fwd_use_pmtu); CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33); /* TXRX readonly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx, sysctl_tcp_moderate_rcvbuf); CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1); /* RX readonly hotpath cache line */ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_ip_early_demux); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_early_demux); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_reordering); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_rmem); CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18); } #endif void __init net_ns_init(void) { struct net_generic *ng; #ifdef CONFIG_NET_NS netns_ipv4_struct_check(); net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC|SLAB_ACCOUNT, NULL); /* Create workqueue for cleanup */ netns_wq = create_singlethread_workqueue("netns"); if (!netns_wq) panic("Could not create netns workq"); #endif ng = net_alloc_generic(); if (!ng) panic("Could not allocate generic netns"); rcu_assign_pointer(init_net.gen, ng); #ifdef CONFIG_KEYS init_net.key_domain = &init_net_key_domain; #endif preinit_net(&init_net, &init_user_ns); down_write(&pernet_ops_rwsem); if (setup_net(&init_net)) panic("Could not setup the initial network namespace"); init_net_initialized = true; up_write(&pernet_ops_rwsem); if (register_pernet_subsys(&net_ns_ops)) panic("Could not register network namespace subsystems"); rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); } static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) { ops_pre_exit_list(ops, net_exit_list); synchronize_rcu(); if (ops->exit_batch_rtnl) { LIST_HEAD(dev_kill_list); rtnl_lock(); ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); } ops_exit_list(ops, net_exit_list); ops_free_list(ops, net_exit_list); } #ifdef CONFIG_NET_NS static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { struct net *net; int error; LIST_HEAD(net_exit_list); list_add_tail(&ops->list, list); if (ops->init || ops->id) { /* We held write locked pernet_ops_rwsem, and parallel * setup_net() and cleanup_net() are not possible. */ for_each_net(net) { error = ops_init(ops, net); if (error) goto out_undo; list_add_tail(&net->exit_list, &net_exit_list); } } return 0; out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); free_exit_list(ops, &net_exit_list); return error; } static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; LIST_HEAD(net_exit_list); list_del(&ops->list); /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); free_exit_list(ops, &net_exit_list); } #else static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { if (!init_net_initialized) { list_add_tail(&ops->list, list); return 0; } return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) { if (!init_net_initialized) { list_del(&ops->list); } else { LIST_HEAD(net_exit_list); list_add(&init_net.exit_list, &net_exit_list); free_exit_list(ops, &net_exit_list); } } #endif /* CONFIG_NET_NS */ static DEFINE_IDA(net_generic_ids); static int register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { int error; if (WARN_ON(!!ops->id ^ !!ops->size)) return -EINVAL; if (ops->id) { error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID, GFP_KERNEL); if (error < 0) return error; *ops->id = error; /* This does not require READ_ONCE as writers already hold * pernet_ops_rwsem. But WRITE_ONCE is needed to protect * net_alloc_generic. */ WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { rcu_barrier(); if (ops->id) ida_free(&net_generic_ids, *ops->id); } return error; } static void unregister_pernet_operations(struct pernet_operations *ops) { __unregister_pernet_operations(ops); rcu_barrier(); if (ops->id) ida_free(&net_generic_ids, *ops->id); } /** * register_pernet_subsys - register a network namespace subsystem * @ops: pernet operations structure for the subsystem * * Register a subsystem which has init and exit functions * that are called when network namespaces are created and * destroyed respectively. * * When registered all network namespace init functions are * called for every existing network namespace. Allowing kernel * modules to have a race free view of the set of network namespaces. * * When a new network namespace is created all of the init * methods are called in the order in which they were registered. * * When a network namespace is destroyed all of the exit methods * are called in the reverse of the order with which they were * registered. */ int register_pernet_subsys(struct pernet_operations *ops) { int error; down_write(&pernet_ops_rwsem); error = register_pernet_operations(first_device, ops); up_write(&pernet_ops_rwsem); return error; } EXPORT_SYMBOL_GPL(register_pernet_subsys); /** * unregister_pernet_subsys - unregister a network namespace subsystem * @ops: pernet operations structure to manipulate * * Remove the pernet operations structure from the list to be * used when network namespaces are created or destroyed. In * addition run the exit method for all existing network * namespaces. */ void unregister_pernet_subsys(struct pernet_operations *ops) { down_write(&pernet_ops_rwsem); unregister_pernet_operations(ops); up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem * * Register a device which has init and exit functions * that are called when network namespaces are created and * destroyed respectively. * * When registered all network namespace init functions are * called for every existing network namespace. Allowing kernel * modules to have a race free view of the set of network namespaces. * * When a new network namespace is created all of the init * methods are called in the order in which they were registered. * * When a network namespace is destroyed all of the exit methods * are called in the reverse of the order with which they were * registered. */ int register_pernet_device(struct pernet_operations *ops) { int error; down_write(&pernet_ops_rwsem); error = register_pernet_operations(&pernet_list, ops); if (!error && (first_device == &pernet_list)) first_device = &ops->list; up_write(&pernet_ops_rwsem); return error; } EXPORT_SYMBOL_GPL(register_pernet_device); /** * unregister_pernet_device - unregister a network namespace netdevice * @ops: pernet operations structure to manipulate * * Remove the pernet operations structure from the list to be * used when network namespaces are created or destroyed. In * addition run the exit method for all existing network * namespaces. */ void unregister_pernet_device(struct pernet_operations *ops) { down_write(&pernet_ops_rwsem); if (&ops->list == first_device) first_device = first_device->next; unregister_pernet_operations(ops); up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(unregister_pernet_device); #ifdef CONFIG_NET_NS static struct ns_common *netns_get(struct task_struct *task) { struct net *net = NULL; struct nsproxy *nsproxy; task_lock(task); nsproxy = task->nsproxy; if (nsproxy) net = get_net(nsproxy->net_ns); task_unlock(task); return net ? &net->ns : NULL; } static inline struct net *to_net_ns(struct ns_common *ns) { return container_of(ns, struct net, ns); } static void netns_put(struct ns_common *ns) { put_net(to_net_ns(ns)); } static int netns_install(struct nsset *nsset, struct ns_common *ns) { struct nsproxy *nsproxy = nsset->nsproxy; struct net *net = to_net_ns(ns); if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) return -EPERM; put_net(nsproxy->net_ns); nsproxy->net_ns = get_net(net); return 0; } static struct user_namespace *netns_owner(struct ns_common *ns) { return to_net_ns(ns)->user_ns; } const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, .owner = netns_owner, }; #endif |
2 2 2 2 2 2 14 14 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/trace.c * * Copyright (C) 2006 Linus Torvalds * * Trace facility for suspend/resume problems, when none of the * devices may be working. */ #define pr_fmt(fmt) "PM: " fmt #include <linux/pm-trace.h> #include <linux/export.h> #include <linux/rtc.h> #include <linux/suspend.h> #include <linux/init.h> #include <linux/mc146818rtc.h> #include "power.h" /* * Horrid, horrid, horrid. * * It turns out that the _only_ piece of hardware that actually * keeps its value across a hard boot (and, more importantly, the * POST init sequence) is literally the realtime clock. * * Never mind that an RTC chip has 114 bytes (and often a whole * other bank of an additional 128 bytes) of nice SRAM that is * _designed_ to keep data - the POST will clear it. So we literally * can just use the few bytes of actual time data, which means that * we're really limited. * * It means, for example, that we can't use the seconds at all * (since the time between the hang and the boot might be more * than a minute), and we'd better not depend on the low bits of * the minutes either. * * There are the wday fields etc, but I wouldn't guarantee those * are dependable either. And if the date isn't valid, either the * hw or POST will do strange things. * * So we're left with: * - year: 0-99 * - month: 0-11 * - day-of-month: 1-28 * - hour: 0-23 * - min: (0-30)*2 * * Giving us a total range of 0-16128000 (0xf61800), ie less * than 24 bits of actual data we can save across reboots. * * And if your box can't boot in less than three minutes, * you're screwed. * * Now, almost 24 bits of data is pitifully small, so we need * to be pretty dense if we want to use it for anything nice. * What we do is that instead of saving off nice readable info, * we save off _hashes_ of information that we can hopefully * regenerate after the reboot. * * In particular, this means that we might be unlucky, and hit * a case where we have a hash collision, and we end up not * being able to tell for certain exactly which case happened. * But that's hopefully unlikely. * * What we do is to take the bits we can fit, and split them * into three parts (16*997*1009 = 16095568), and use the values * for: * - 0-15: user-settable * - 0-996: file + line number * - 0-1008: device */ #define USERHASH (16) #define FILEHASH (997) #define DEVHASH (1009) #define DEVSEED (7919) bool pm_trace_rtc_abused __read_mostly; EXPORT_SYMBOL_GPL(pm_trace_rtc_abused); static unsigned int dev_hash_value; static int set_magic_time(unsigned int user, unsigned int file, unsigned int device) { unsigned int n = user + USERHASH*(file + FILEHASH*device); // June 7th, 2006 static struct rtc_time time = { .tm_sec = 0, .tm_min = 0, .tm_hour = 0, .tm_mday = 7, .tm_mon = 5, // June - counting from zero .tm_year = 106, .tm_wday = 3, .tm_yday = 160, .tm_isdst = 1 }; time.tm_year = (n % 100); n /= 100; time.tm_mon = (n % 12); n /= 12; time.tm_mday = (n % 28) + 1; n /= 28; time.tm_hour = (n % 24); n /= 24; time.tm_min = (n % 20) * 3; n /= 20; mc146818_set_time(&time); pm_trace_rtc_abused = true; return n ? -1 : 0; } static unsigned int read_magic_time(void) { struct rtc_time time; unsigned int val; if (mc146818_get_time(&time, 1000) < 0) { pr_err("Unable to read current time from RTC\n"); return 0; } pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time); val = time.tm_year; /* 100 years */ if (val > 100) val -= 100; val += time.tm_mon * 100; /* 12 months */ val += (time.tm_mday-1) * 100 * 12; /* 28 month-days */ val += time.tm_hour * 100 * 12 * 28; /* 24 hours */ val += (time.tm_min / 3) * 100 * 12 * 28 * 24; /* 20 3-minute intervals */ return val; } /* * This is just the sdbm hash function with a user-supplied * seed and final size parameter. */ static unsigned int hash_string(unsigned int seed, const char *data, unsigned int mod) { unsigned char c; while ((c = *data++) != 0) { seed = (seed << 16) + (seed << 6) - seed + c; } return seed % mod; } void set_trace_device(struct device *dev) { dev_hash_value = hash_string(DEVSEED, dev_name(dev), DEVHASH); } EXPORT_SYMBOL(set_trace_device); /* * We could just take the "tracedata" index into the .tracedata * section instead. Generating a hash of the data gives us a * chance to work across kernel versions, and perhaps more * importantly it also gives us valid/invalid check (ie we will * likely not give totally bogus reports - if the hash matches, * it's not any guarantee, but it's a high _likelihood_ that * the match is valid). */ void generate_pm_trace(const void *tracedata, unsigned int user) { unsigned short lineno = *(unsigned short *)tracedata; const char *file = *(const char **)(tracedata + 2); unsigned int user_hash_value, file_hash_value; if (!x86_platform.legacy.rtc) return; user_hash_value = user % USERHASH; file_hash_value = hash_string(lineno, file, FILEHASH); set_magic_time(user_hash_value, file_hash_value, dev_hash_value); } EXPORT_SYMBOL(generate_pm_trace); extern char __tracedata_start[], __tracedata_end[]; static int show_file_hash(unsigned int value) { int match; char *tracedata; match = 0; for (tracedata = __tracedata_start ; tracedata < __tracedata_end ; tracedata += 2 + sizeof(unsigned long)) { unsigned short lineno = *(unsigned short *)tracedata; const char *file = *(const char **)(tracedata + 2); unsigned int hash = hash_string(lineno, file, FILEHASH); if (hash != value) continue; pr_info(" hash matches %s:%u\n", file, lineno); match++; } return match; } static int show_dev_hash(unsigned int value) { int match = 0; struct list_head *entry; device_pm_lock(); entry = dpm_list.prev; while (entry != &dpm_list) { struct device * dev = to_device(entry); unsigned int hash = hash_string(DEVSEED, dev_name(dev), DEVHASH); if (hash == value) { dev_info(dev, "hash matches\n"); match++; } entry = entry->prev; } device_pm_unlock(); return match; } static unsigned int hash_value_early_read; int show_trace_dev_match(char *buf, size_t size) { unsigned int value = hash_value_early_read / (USERHASH * FILEHASH); int ret = 0; struct list_head *entry; /* * It's possible that multiple devices will match the hash and we can't * tell which is the culprit, so it's best to output them all. */ device_pm_lock(); entry = dpm_list.prev; while (size && entry != &dpm_list) { struct device *dev = to_device(entry); unsigned int hash = hash_string(DEVSEED, dev_name(dev), DEVHASH); if (hash == value) { int len = snprintf(buf, size, "%s\n", dev_driver_string(dev)); if (len > size) len = size; buf += len; ret += len; size -= len; } entry = entry->prev; } device_pm_unlock(); return ret; } static int pm_trace_notify(struct notifier_block *nb, unsigned long mode, void *_unused) { switch (mode) { case PM_POST_HIBERNATION: case PM_POST_SUSPEND: if (pm_trace_rtc_abused) { pm_trace_rtc_abused = false; pr_warn("Possible incorrect RTC due to pm_trace, please use 'ntpdate' or 'rdate' to reset it.\n"); } break; default: break; } return 0; } static struct notifier_block pm_trace_nb = { .notifier_call = pm_trace_notify, }; static int __init early_resume_init(void) { if (!x86_platform.legacy.rtc) return 0; hash_value_early_read = read_magic_time(); register_pm_notifier(&pm_trace_nb); return 0; } static int __init late_resume_init(void) { unsigned int val = hash_value_early_read; unsigned int user, file, dev; if (!x86_platform.legacy.rtc) return 0; user = val % USERHASH; val = val / USERHASH; file = val % FILEHASH; val = val / FILEHASH; dev = val /* % DEVHASH */; pr_info(" Magic number: %d:%d:%d\n", user, file, dev); show_file_hash(file); show_dev_hash(dev); return 0; } core_initcall(early_resume_init); late_initcall(late_resume_init); |
4 4 4 4 4 4 4 4 4 4 79 79 79 79 78 78 79 79 79 79 4 4 4 79 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 | // SPDX-License-Identifier: GPL-2.0-only /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic TIME_WAIT sockets functions * * From code orinally in TCP */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/ip.h> /** * inet_twsk_bind_unhash - unhash a timewait socket from bind hash * @tw: timewait socket * @hashinfo: hashinfo pointer * * unhash a timewait socket from bind hash, if hashed. * bind hash lock must be held by caller. * Returns 1 if caller should call inet_twsk_put() after lock release. */ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind2_bucket *tb2 = tw->tw_tb2; struct inet_bind_bucket *tb = tw->tw_tb; if (!tb) return; __sk_del_bind_node((struct sock *)tw); tw->tw_tb = NULL; tw->tw_tb2 = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); __sock_put((struct sock *)tw); } /* Must be called with locally disabled BHs. */ static void inet_twsk_kill(struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); struct inet_bind_hashbucket *bhead, *bhead2; spin_lock(lock); sk_nulls_del_node_init_rcu((struct sock *)tw); spin_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw, twsk_net(tw), tw->tw_num); spin_lock(&bhead->lock); spin_lock(&bhead2->lock); inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); refcount_dec(&tw->tw_dr->tw_refcount); inet_twsk_put(tw); } void inet_twsk_free(struct inet_timewait_sock *tw) { struct module *owner = tw->tw_prot->owner; twsk_destructor((struct sock *)tw); kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } void inet_twsk_put(struct inet_timewait_sock *tw) { if (refcount_dec_and_test(&tw->tw_refcnt)) inet_twsk_free(tw); } EXPORT_SYMBOL_GPL(inet_twsk_put); static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&tw->tw_node, list); } static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, false); } /* * Enter the time wait state. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. * * The caller must not access @tw anymore after this function returns. */ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo, int timeo) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead, *bhead2; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); local_bh_disable(); spin_lock(&bhead->lock); spin_lock(&bhead2->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); tw->tw_tb2 = icsk->icsk_bind2_hash; WARN_ON(!icsk->icsk_bind2_hash); sk_add_bind_node((struct sock *)tw, &tw->tw_tb2->owners); spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); spin_lock(lock); /* Step 2: Hash TW into tcp ehash chain */ inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); /* Ensure above writes are committed into memory before updating the * refcount. * Provides ordering vs later refcount_inc(). */ smp_wmb(); /* tw_refcnt is set to 3 because we have : * - one reference for bhash chain. * - one reference for ehash chain. * - one reference for timer. * Also note that after this point, we lost our implicit reference * so we are not allowed to use tw anymore. */ refcount_set(&tw->tw_refcnt, 3); inet_twsk_schedule(tw, timeo); spin_unlock(lock); local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule); static void tw_timer_handler(struct timer_list *t) { struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer); inet_twsk_kill(tw); } struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state) { struct inet_timewait_sock *tw; if (refcount_read(&dr->tw_refcount) - 1 >= READ_ONCE(dr->sysctl_max_tw_buckets)) return NULL; tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, GFP_ATOMIC); if (tw) { const struct inet_sock *inet = inet_sk(sk); tw->tw_dr = dr; /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; tw->tw_tos = inet->tos; tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; tw->tw_sport = inet->inet_sport; tw->tw_dport = inet->inet_dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; tw->tw_reuseport = sk->sk_reuseport; tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); timer_setup(&tw->tw_timer, tw_timer_handler, 0); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this * timewait socket. */ refcount_set(&tw->tw_refcnt, 0); __module_get(tw->tw_prot->owner); } return tw; } EXPORT_SYMBOL_GPL(inet_twsk_alloc); /* These are always called from BH context. See callers in * tcp_input.c to verify this. */ /* This is for handling early-kills of TIME_WAIT sockets. * Warning : consume reference. * Caller should not access tw anymore. */ void inet_twsk_deschedule_put(struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); /* inet_twsk_purge() walks over all sockets, including tw ones, * and removes them via inet_twsk_deschedule_put() after a * refcount_inc_not_zero(). * * inet_twsk_hashdance_schedule() must (re)init the refcount before * arming the timer, i.e. inet_twsk_purge can obtain a reference to * a twsk that did not yet schedule the timer. * * The ehash lock synchronizes these two: * After acquiring the lock, the timer is always scheduled (else * timer_shutdown returns false), because hashdance_schedule releases * the ehash lock only after completing the timer initialization. * * Without grabbing the ehash lock, we get: * 1) cpu x sets twsk refcount to 3 * 2) cpu y bumps refcount to 4 * 3) cpu y calls inet_twsk_deschedule_put() and shuts timer down * 4) cpu x tries to start timer, but mod_timer is a noop post-shutdown * -> timer refcount is never decremented. */ spin_lock(lock); /* Makes sure hashdance_schedule() has completed */ spin_unlock(lock); if (timer_shutdown_sync(&tw->tw_timer)) inet_twsk_kill(tw); inet_twsk_put(tw); } EXPORT_SYMBOL(inet_twsk_deschedule_put); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) { /* timeout := RTO * 3.5 * * 3.5 = 1+2+0.5 to wait for two retransmits. * * RATIONALE: if FIN arrived and we entered TIME-WAIT state, * our ACK acking that FIN can be lost. If N subsequent retransmitted * FINs (or previous seqments) are lost (probability of such event * is p^(N+1), where p is probability to lose single packet and * time to detect the loss is about RTO*(2^N - 1) with exponential * backoff). Normal timewait length is calculated so, that we * waited at least for one retransmitted FIN (maximal RTO is 120sec). * [ BTW Linux. following BSD, violates this requirement waiting * only for 60sec, we should wait at least for 240 secs. * Well, 240 consumes too much of resources 8) * ] * This interval is not reduced to catch old duplicate and * responces to our wandering segments living for two MSLs. * However, if we use PAWS to detect * old duplicates, we can reduce the interval to bounds required * by RTO, rather than MSL. So, if peer understands PAWS, we * kill tw bucket after 3.5*RTO (it is important that this number * is greater than TS tick!) and detect old duplicates with help * of PAWS. */ if (!rearm) { bool kill = timeo <= 4*HZ; __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED : LINUX_MIB_TIMEWAITED); BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo)); refcount_inc(&tw->tw_dr->tw_refcount); } else { mod_timer_pending(&tw->tw_timer, jiffies + timeo); } } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); /* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ void inet_twsk_purge(struct inet_hashinfo *hashinfo) { struct inet_ehash_bucket *head = &hashinfo->ehash[0]; unsigned int ehash_mask = hashinfo->ehash_mask; struct hlist_nulls_node *node; unsigned int slot; struct sock *sk; for (slot = 0; slot <= ehash_mask; slot++, head++) { if (hlist_nulls_empty(&head->chain)) continue; restart_rcu: cond_resched(); rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { int state = inet_sk_state_load(sk); if ((1 << state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV)) continue; if (refcount_read(&sock_net(sk)->ns.count)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) continue; if (refcount_read(&sock_net(sk)->ns.count)) { sock_gen_put(sk); goto restart; } rcu_read_unlock(); local_bh_disable(); if (state == TCP_TIME_WAIT) { inet_twsk_deschedule_put(inet_twsk(sk)); } else { struct request_sock *req = inet_reqsk(sk); inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); } local_bh_enable(); goto restart_rcu; } /* If the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != slot) goto restart; rcu_read_unlock(); } } EXPORT_SYMBOL_GPL(inet_twsk_purge); |
49 6 47 47 9 43 43 9 47 47 47 47 47 47 47 47 47 14 44 49 18 45 45 44 40 16 16 9 50 9 16 49 50 50 44 50 12 12 9 44 9 45 45 45 45 8 44 44 9 9 9 2 2 2 6 6 6 6 5 5 5 5 5 5 5 3 5 3 3 3 3 3 3 3 5 5 5 1784 1750 3 5 5 61 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 | // SPDX-License-Identifier: GPL-2.0-only // Copyright (c) 2020 Facebook Inc. #include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/workqueue.h> #include <net/udp_tunnel.h> #include <net/vxlan.h> enum udp_tunnel_nic_table_entry_flags { UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0), UDP_TUNNEL_NIC_ENTRY_DEL = BIT(1), UDP_TUNNEL_NIC_ENTRY_OP_FAIL = BIT(2), UDP_TUNNEL_NIC_ENTRY_FROZEN = BIT(3), }; struct udp_tunnel_nic_table_entry { __be16 port; u8 type; u8 flags; u16 use_cnt; #define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX u8 hw_priv; }; /** * struct udp_tunnel_nic - UDP tunnel port offload state * @work: async work for talking to hardware from process context * @dev: netdev pointer * @need_sync: at least one port start changed * @need_replay: space was freed, we need a replay of all ports * @work_pending: @work is currently scheduled * @n_tables: number of tables under @entries * @missed: bitmap of tables which overflown * @entries: table of tables of ports currently offloaded */ struct udp_tunnel_nic { struct work_struct work; struct net_device *dev; u8 need_sync:1; u8 need_replay:1; u8 work_pending:1; unsigned int n_tables; unsigned long missed; struct udp_tunnel_nic_table_entry *entries[] __counted_by(n_tables); }; /* We ensure all work structs are done using driver state, but not the code. * We need a workqueue we can flush before module gets removed. */ static struct workqueue_struct *udp_tunnel_nic_workqueue; static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type) { switch (type) { case UDP_TUNNEL_TYPE_VXLAN: return "vxlan"; case UDP_TUNNEL_TYPE_GENEVE: return "geneve"; case UDP_TUNNEL_TYPE_VXLAN_GPE: return "vxlan-gpe"; default: return "unknown"; } } static bool udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry) { return entry->use_cnt == 0 && !entry->flags; } static bool udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry) { return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN); } static bool udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry) { return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN; } static void udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry) { if (!udp_tunnel_nic_entry_is_free(entry)) entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN; } static void udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry) { entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN; } static bool udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry) { return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD | UDP_TUNNEL_NIC_ENTRY_DEL); } static void udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn, struct udp_tunnel_nic_table_entry *entry, unsigned int flag) { entry->flags |= flag; utn->need_sync = 1; } static void udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry, struct udp_tunnel_info *ti) { memset(ti, 0, sizeof(*ti)); ti->port = entry->port; ti->type = entry->type; ti->hw_priv = entry->hw_priv; } static bool udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; unsigned int i, j; for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) return false; return true; } static bool udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_table_info *table; unsigned int i, j; if (!utn->missed) return false; for (i = 0; i < utn->n_tables; i++) { table = &dev->udp_tunnel_nic_info->tables[i]; if (!test_bit(i, &utn->missed)) continue; for (j = 0; j < table->n_entries; j++) if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) return true; } return false; } static void __udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { struct udp_tunnel_nic_table_entry *entry; struct udp_tunnel_nic *utn; utn = dev->udp_tunnel_nic; entry = &utn->entries[table][idx]; if (entry->use_cnt) udp_tunnel_nic_ti_from_entry(entry, ti); } static void __udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv; } static void udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry, int err) { bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL); if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && (!err || (err == -EEXIST && dodgy))) entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD; if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL && (!err || (err == -ENOENT && dodgy))) entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL; if (!err) entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL; else entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL; } static void udp_tunnel_nic_device_sync_one(struct net_device *dev, struct udp_tunnel_nic *utn, unsigned int table, unsigned int idx) { struct udp_tunnel_nic_table_entry *entry; struct udp_tunnel_info ti; int err; entry = &utn->entries[table][idx]; if (!udp_tunnel_nic_entry_is_queued(entry)) return; udp_tunnel_nic_ti_from_entry(entry, &ti); if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD) err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti); else err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx, &ti); udp_tunnel_nic_entry_update_done(entry, err); if (err) netdev_warn(dev, "UDP tunnel port sync failed port %d type %s: %d\n", be16_to_cpu(entry->port), udp_tunnel_nic_tunnel_type_name(entry->type), err); } static void udp_tunnel_nic_device_sync_by_port(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; unsigned int i, j; for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) udp_tunnel_nic_device_sync_one(dev, utn, i, j); } static void udp_tunnel_nic_device_sync_by_table(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; unsigned int i, j; int err; for (i = 0; i < utn->n_tables; i++) { /* Find something that needs sync in this table */ for (j = 0; j < info->tables[i].n_entries; j++) if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j])) break; if (j == info->tables[i].n_entries) continue; err = info->sync_table(dev, i); if (err) netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n", i, err); for (j = 0; j < info->tables[i].n_entries; j++) { struct udp_tunnel_nic_table_entry *entry; entry = &utn->entries[i][j]; if (udp_tunnel_nic_entry_is_queued(entry)) udp_tunnel_nic_entry_update_done(entry, err); } } } static void __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) { if (!utn->need_sync) return; if (dev->udp_tunnel_nic_info->sync_table) udp_tunnel_nic_device_sync_by_table(dev, utn); else udp_tunnel_nic_device_sync_by_port(dev, utn); utn->need_sync = 0; /* Can't replay directly here, in case we come from the tunnel driver's * notification - trying to replay may deadlock inside tunnel driver. */ utn->need_replay = udp_tunnel_nic_should_replay(dev, utn); } static void udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; bool may_sleep; if (!utn->need_sync) return; /* Drivers which sleep in the callback need to update from * the workqueue, if we come from the tunnel driver's notification. */ may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP; if (!may_sleep) __udp_tunnel_nic_device_sync(dev, utn); if (may_sleep || utn->need_replay) { queue_work(udp_tunnel_nic_workqueue, &utn->work); utn->work_pending = 1; } } static bool udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table, struct udp_tunnel_info *ti) { return table->tunnel_types & ti->type; } static bool udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn, struct udp_tunnel_info *ti) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; unsigned int i; /* Special case IPv4-only NICs */ if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY && ti->sa_family != AF_INET) return false; for (i = 0; i < utn->n_tables; i++) if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti)) return true; return false; } static int udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn, struct udp_tunnel_info *ti) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; struct udp_tunnel_nic_table_entry *entry; unsigned int i, j; for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) { entry = &utn->entries[i][j]; if (!udp_tunnel_nic_entry_is_free(entry) && entry->port == ti->port && entry->type != ti->type) { __set_bit(i, &utn->missed); return true; } } return false; } static void udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, unsigned int table, unsigned int idx, int use_cnt_adj) { struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; unsigned int from, to; WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX); /* If not going from used to unused or vice versa - all done. * For dodgy entries make sure we try to sync again (queue the entry). */ entry->use_cnt += use_cnt_adj; if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj)) return; /* Cancel the op before it was sent to the device, if possible, * otherwise we'd need to take special care to issue commands * in the same order the ports arrived. */ if (use_cnt_adj < 0) { from = UDP_TUNNEL_NIC_ENTRY_ADD; to = UDP_TUNNEL_NIC_ENTRY_DEL; } else { from = UDP_TUNNEL_NIC_ENTRY_DEL; to = UDP_TUNNEL_NIC_ENTRY_ADD; } if (entry->flags & from) { entry->flags &= ~from; if (!dodgy) return; } udp_tunnel_nic_entry_queue(utn, entry, to); } static bool udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti, int use_cnt_adj) { struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; if (udp_tunnel_nic_entry_is_free(entry) || entry->port != ti->port || entry->type != ti->type) return false; if (udp_tunnel_nic_entry_is_frozen(entry)) return true; udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj); return true; } /* Try to find existing matching entry and adjust its use count, instead of * adding a new one. Returns true if entry was found. In case of delete the * entry may have gotten removed in the process, in which case it will be * queued for removal. */ static bool udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn, struct udp_tunnel_info *ti, int use_cnt_adj) { const struct udp_tunnel_nic_table_info *table; unsigned int i, j; for (i = 0; i < utn->n_tables; i++) { table = &dev->udp_tunnel_nic_info->tables[i]; if (!udp_tunnel_nic_table_is_capable(table, ti)) continue; for (j = 0; j < table->n_entries; j++) if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti, use_cnt_adj)) return true; } return false; } static bool udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn, struct udp_tunnel_info *ti) { return udp_tunnel_nic_try_existing(dev, utn, ti, +1); } static bool udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn, struct udp_tunnel_info *ti) { return udp_tunnel_nic_try_existing(dev, utn, ti, -1); } static bool udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn, struct udp_tunnel_info *ti) { const struct udp_tunnel_nic_table_info *table; unsigned int i, j; for (i = 0; i < utn->n_tables; i++) { table = &dev->udp_tunnel_nic_info->tables[i]; if (!udp_tunnel_nic_table_is_capable(table, ti)) continue; for (j = 0; j < table->n_entries; j++) { struct udp_tunnel_nic_table_entry *entry; entry = &utn->entries[i][j]; if (!udp_tunnel_nic_entry_is_free(entry)) continue; entry->port = ti->port; entry->type = ti->type; entry->use_cnt = 1; udp_tunnel_nic_entry_queue(utn, entry, UDP_TUNNEL_NIC_ENTRY_ADD); return true; } /* The different table may still fit this port in, but there * are no devices currently which have multiple tables accepting * the same tunnel type, and false positives are okay. */ __set_bit(i, &utn->missed); } return false; } static void __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; struct udp_tunnel_nic *utn; utn = dev->udp_tunnel_nic; if (!utn) return; if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY) return; if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN && ti->port == htons(IANA_VXLAN_UDP_PORT)) { if (ti->type != UDP_TUNNEL_TYPE_VXLAN) netdev_warn(dev, "device assumes port 4789 will be used by vxlan tunnels\n"); return; } if (!udp_tunnel_nic_is_capable(dev, utn, ti)) return; /* It may happen that a tunnel of one type is removed and different * tunnel type tries to reuse its port before the device was informed. * Rely on utn->missed to re-add this port later. */ if (udp_tunnel_nic_has_collision(dev, utn, ti)) return; if (!udp_tunnel_nic_add_existing(dev, utn, ti)) udp_tunnel_nic_add_new(dev, utn, ti); udp_tunnel_nic_device_sync(dev, utn); } static void __udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) { struct udp_tunnel_nic *utn; utn = dev->udp_tunnel_nic; if (!utn) return; if (!udp_tunnel_nic_is_capable(dev, utn, ti)) return; udp_tunnel_nic_del_existing(dev, utn, ti); udp_tunnel_nic_device_sync(dev, utn); } static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; struct udp_tunnel_nic *utn; unsigned int i, j; ASSERT_RTNL(); utn = dev->udp_tunnel_nic; if (!utn) return; utn->need_sync = false; for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) { struct udp_tunnel_nic_table_entry *entry; entry = &utn->entries[i][j]; entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL | UDP_TUNNEL_NIC_ENTRY_OP_FAIL); /* We don't release rtnl across ops */ WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN); if (!entry->use_cnt) continue; udp_tunnel_nic_entry_queue(utn, entry, UDP_TUNNEL_NIC_ENTRY_ADD); } __udp_tunnel_nic_device_sync(dev, utn); } static size_t __udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; struct udp_tunnel_nic *utn; unsigned int j; size_t size; utn = dev->udp_tunnel_nic; if (!utn) return 0; size = 0; for (j = 0; j < info->tables[table].n_entries; j++) { if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j])) continue; size += nla_total_size(0) + /* _TABLE_ENTRY */ nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */ nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */ } return size; } static int __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, struct sk_buff *skb) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; struct udp_tunnel_nic *utn; struct nlattr *nest; unsigned int j; utn = dev->udp_tunnel_nic; if (!utn) return 0; for (j = 0; j < info->tables[table].n_entries; j++) { if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j])) continue; nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); if (!nest) return -EMSGSIZE; if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, utn->entries[table][j].port) || nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, ilog2(utn->entries[table][j].type))) goto err_cancel; nla_nest_end(skb, nest); } return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { .get_port = __udp_tunnel_nic_get_port, .set_port_priv = __udp_tunnel_nic_set_port_priv, .add_port = __udp_tunnel_nic_add_port, .del_port = __udp_tunnel_nic_del_port, .reset_ntf = __udp_tunnel_nic_reset_ntf, .dump_size = __udp_tunnel_nic_dump_size, .dump_write = __udp_tunnel_nic_dump_write, }; static void udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; unsigned int i, j; for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) { int adj_cnt = -utn->entries[i][j].use_cnt; if (adj_cnt) udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt); } __udp_tunnel_nic_device_sync(dev, utn); for (i = 0; i < utn->n_tables; i++) memset(utn->entries[i], 0, array_size(info->tables[i].n_entries, sizeof(**utn->entries))); WARN_ON(utn->need_sync); utn->need_replay = 0; } static void udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; struct udp_tunnel_nic_shared_node *node; unsigned int i, j; /* Freeze all the ports we are already tracking so that the replay * does not double up the refcount. */ for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]); utn->missed = 0; utn->need_replay = 0; if (!info->shared) { udp_tunnel_get_rx_info(dev); } else { list_for_each_entry(node, &info->shared->devices, list) udp_tunnel_get_rx_info(node->dev); } for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]); } static void udp_tunnel_nic_device_sync_work(struct work_struct *work) { struct udp_tunnel_nic *utn = container_of(work, struct udp_tunnel_nic, work); rtnl_lock(); utn->work_pending = 0; __udp_tunnel_nic_device_sync(utn->dev, utn); if (utn->need_replay) udp_tunnel_nic_replay(utn->dev, utn); rtnl_unlock(); } static struct udp_tunnel_nic * udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, unsigned int n_tables) { struct udp_tunnel_nic *utn; unsigned int i; utn = kzalloc(struct_size(utn, entries, n_tables), GFP_KERNEL); if (!utn) return NULL; utn->n_tables = n_tables; INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); for (i = 0; i < n_tables; i++) { utn->entries[i] = kcalloc(info->tables[i].n_entries, sizeof(*utn->entries[i]), GFP_KERNEL); if (!utn->entries[i]) goto err_free_prev_entries; } return utn; err_free_prev_entries: while (i--) kfree(utn->entries[i]); kfree(utn); return NULL; } static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn) { unsigned int i; for (i = 0; i < utn->n_tables; i++) kfree(utn->entries[i]); kfree(utn); } static int udp_tunnel_nic_register(struct net_device *dev) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; struct udp_tunnel_nic_shared_node *node = NULL; struct udp_tunnel_nic *utn; unsigned int n_tables, i; BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < UDP_TUNNEL_NIC_MAX_TABLES); /* Expect use count of at most 2 (IPv4, IPv6) per device */ BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX < UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2); /* Check that the driver info is sane */ if (WARN_ON(!info->set_port != !info->unset_port) || WARN_ON(!info->set_port == !info->sync_table) || WARN_ON(!info->tables[0].n_entries)) return -EINVAL; if (WARN_ON(info->shared && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) return -EINVAL; n_tables = 1; for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { if (!info->tables[i].n_entries) continue; n_tables++; if (WARN_ON(!info->tables[i - 1].n_entries)) return -EINVAL; } /* Create UDP tunnel state structures */ if (info->shared) { node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; node->dev = dev; } if (info->shared && info->shared->udp_tunnel_nic_info) { utn = info->shared->udp_tunnel_nic_info; } else { utn = udp_tunnel_nic_alloc(info, n_tables); if (!utn) { kfree(node); return -ENOMEM; } } if (info->shared) { if (!info->shared->udp_tunnel_nic_info) { INIT_LIST_HEAD(&info->shared->devices); info->shared->udp_tunnel_nic_info = utn; } list_add_tail(&node->list, &info->shared->devices); } utn->dev = dev; dev_hold(dev); dev->udp_tunnel_nic = utn; if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) udp_tunnel_get_rx_info(dev); return 0; } static void udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; /* For a shared table remove this dev from the list of sharing devices * and if there are other devices just detach. */ if (info->shared) { struct udp_tunnel_nic_shared_node *node, *first; list_for_each_entry(node, &info->shared->devices, list) if (node->dev == dev) break; if (list_entry_is_head(node, &info->shared->devices, list)) return; list_del(&node->list); kfree(node); first = list_first_entry_or_null(&info->shared->devices, typeof(*first), list); if (first) { udp_tunnel_drop_rx_info(dev); utn->dev = first->dev; goto release_dev; } info->shared->udp_tunnel_nic_info = NULL; } /* Flush before we check work, so we don't waste time adding entries * from the work which we will boot immediately. */ udp_tunnel_nic_flush(dev, utn); /* Wait for the work to be done using the state, netdev core will * retry unregister until we give up our reference on this device. */ if (utn->work_pending) return; udp_tunnel_nic_free(utn); release_dev: dev->udp_tunnel_nic = NULL; dev_put(dev); } static int udp_tunnel_nic_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); const struct udp_tunnel_nic_info *info; struct udp_tunnel_nic *utn; info = dev->udp_tunnel_nic_info; if (!info) return NOTIFY_DONE; if (event == NETDEV_REGISTER) { int err; err = udp_tunnel_nic_register(dev); if (err) netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err); return notifier_from_errno(err); } /* All other events will need the udp_tunnel_nic state */ utn = dev->udp_tunnel_nic; if (!utn) return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { udp_tunnel_nic_unregister(dev, utn); return NOTIFY_OK; } /* All other events only matter if NIC has to be programmed open */ if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) return NOTIFY_DONE; if (event == NETDEV_UP) { WARN_ON(!udp_tunnel_nic_is_empty(dev, utn)); udp_tunnel_get_rx_info(dev); return NOTIFY_OK; } if (event == NETDEV_GOING_DOWN) { udp_tunnel_nic_flush(dev, utn); return NOTIFY_OK; } return NOTIFY_DONE; } static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = { .notifier_call = udp_tunnel_nic_netdevice_event, }; static int __init udp_tunnel_nic_init_module(void) { int err; udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0); if (!udp_tunnel_nic_workqueue) return -ENOMEM; rtnl_lock(); udp_tunnel_nic_ops = &__udp_tunnel_nic_ops; rtnl_unlock(); err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block); if (err) goto err_unset_ops; return 0; err_unset_ops: rtnl_lock(); udp_tunnel_nic_ops = NULL; rtnl_unlock(); destroy_workqueue(udp_tunnel_nic_workqueue); return err; } late_initcall(udp_tunnel_nic_init_module); static void __exit udp_tunnel_nic_cleanup_module(void) { unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block); rtnl_lock(); udp_tunnel_nic_ops = NULL; rtnl_unlock(); destroy_workqueue(udp_tunnel_nic_workqueue); } module_exit(udp_tunnel_nic_cleanup_module); MODULE_LICENSE("GPL"); |
10 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2008 ioogle, Inc. All rights reserved. * * Libata transport class. * * The ATA transport class contains common code to deal with ATA HBAs, * an approximated representation of ATA topologies in the driver model, * and various sysfs attributes to expose these topologies and management * interfaces to user-space. * * There are 3 objects defined in this class: * - ata_port * - ata_link * - ata_device * Each port has a link object. Each link can have up to two devices for PATA * and generally one for SATA. * If there is SATA port multiplier [PMP], 15 additional ata_link object are * created. * * These objects are created when the ata host is initialized and when a PMP is * found. They are removed only when the HBA is removed, cleaned before the * error handler runs. */ #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <scsi/scsi_transport.h> #include <linux/libata.h> #include <linux/hdreg.h> #include <linux/uaccess.h> #include <linux/pm_runtime.h> #include "libata.h" #include "libata-transport.h" #define ATA_PORT_ATTRS 3 #define ATA_LINK_ATTRS 3 #define ATA_DEV_ATTRS 9 struct scsi_transport_template; struct scsi_transport_template *ata_scsi_transport_template; struct ata_internal { struct scsi_transport_template t; struct device_attribute private_port_attrs[ATA_PORT_ATTRS]; struct device_attribute private_link_attrs[ATA_LINK_ATTRS]; struct device_attribute private_dev_attrs[ATA_DEV_ATTRS]; struct transport_container link_attr_cont; struct transport_container dev_attr_cont; /* * The array of null terminated pointers to attributes * needed by scsi_sysfs.c */ struct device_attribute *link_attrs[ATA_LINK_ATTRS + 1]; struct device_attribute *port_attrs[ATA_PORT_ATTRS + 1]; struct device_attribute *dev_attrs[ATA_DEV_ATTRS + 1]; }; #define to_ata_internal(tmpl) container_of(tmpl, struct ata_internal, t) #define tdev_to_device(d) \ container_of((d), struct ata_device, tdev) #define transport_class_to_dev(dev) \ tdev_to_device((dev)->parent) #define tdev_to_link(d) \ container_of((d), struct ata_link, tdev) #define transport_class_to_link(dev) \ tdev_to_link((dev)->parent) #define tdev_to_port(d) \ container_of((d), struct ata_port, tdev) #define transport_class_to_port(dev) \ tdev_to_port((dev)->parent) /* * Hack to allow attributes of the same name in different objects. */ #define ATA_DEVICE_ATTR(_prefix,_name,_mode,_show,_store) \ struct device_attribute device_attr_##_prefix##_##_name = \ __ATTR(_name,_mode,_show,_store) #define ata_bitfield_name_match(title, table) \ static ssize_t \ get_ata_##title##_names(u32 table_key, char *buf) \ { \ char *prefix = ""; \ ssize_t len = 0; \ int i; \ \ for (i = 0; i < ARRAY_SIZE(table); i++) { \ if (table[i].value & table_key) { \ len += sprintf(buf + len, "%s%s", \ prefix, table[i].name); \ prefix = ", "; \ } \ } \ len += sprintf(buf + len, "\n"); \ return len; \ } #define ata_bitfield_name_search(title, table) \ static ssize_t \ get_ata_##title##_names(u32 table_key, char *buf) \ { \ ssize_t len = 0; \ int i; \ \ for (i = 0; i < ARRAY_SIZE(table); i++) { \ if (table[i].value == table_key) { \ len += sprintf(buf + len, "%s", \ table[i].name); \ break; \ } \ } \ len += sprintf(buf + len, "\n"); \ return len; \ } static struct { u32 value; char *name; } ata_class_names[] = { { ATA_DEV_UNKNOWN, "unknown" }, { ATA_DEV_ATA, "ata" }, { ATA_DEV_ATA_UNSUP, "ata" }, { ATA_DEV_ATAPI, "atapi" }, { ATA_DEV_ATAPI_UNSUP, "atapi" }, { ATA_DEV_PMP, "pmp" }, { ATA_DEV_PMP_UNSUP, "pmp" }, { ATA_DEV_SEMB, "semb" }, { ATA_DEV_SEMB_UNSUP, "semb" }, { ATA_DEV_ZAC, "zac" }, { ATA_DEV_NONE, "none" } }; ata_bitfield_name_search(class, ata_class_names) static struct { u32 value; char *name; } ata_err_names[] = { { AC_ERR_DEV, "DeviceError" }, { AC_ERR_HSM, "HostStateMachineError" }, { AC_ERR_TIMEOUT, "Timeout" }, { AC_ERR_MEDIA, "MediaError" }, { AC_ERR_ATA_BUS, "BusError" }, { AC_ERR_HOST_BUS, "HostBusError" }, { AC_ERR_SYSTEM, "SystemError" }, { AC_ERR_INVALID, "InvalidArg" }, { AC_ERR_OTHER, "Unknown" }, { AC_ERR_NODEV_HINT, "NoDeviceHint" }, { AC_ERR_NCQ, "NCQError" } }; ata_bitfield_name_match(err, ata_err_names) static struct { u32 value; char *name; } ata_xfer_names[] = { { XFER_UDMA_7, "XFER_UDMA_7" }, { XFER_UDMA_6, "XFER_UDMA_6" }, { XFER_UDMA_5, "XFER_UDMA_5" }, { XFER_UDMA_4, "XFER_UDMA_4" }, { XFER_UDMA_3, "XFER_UDMA_3" }, { XFER_UDMA_2, "XFER_UDMA_2" }, { XFER_UDMA_1, "XFER_UDMA_1" }, { XFER_UDMA_0, "XFER_UDMA_0" }, { XFER_MW_DMA_4, "XFER_MW_DMA_4" }, { XFER_MW_DMA_3, "XFER_MW_DMA_3" }, { XFER_MW_DMA_2, "XFER_MW_DMA_2" }, { XFER_MW_DMA_1, "XFER_MW_DMA_1" }, { XFER_MW_DMA_0, "XFER_MW_DMA_0" }, { XFER_SW_DMA_2, "XFER_SW_DMA_2" }, { XFER_SW_DMA_1, "XFER_SW_DMA_1" }, { XFER_SW_DMA_0, "XFER_SW_DMA_0" }, { XFER_PIO_6, "XFER_PIO_6" }, { XFER_PIO_5, "XFER_PIO_5" }, { XFER_PIO_4, "XFER_PIO_4" }, { XFER_PIO_3, "XFER_PIO_3" }, { XFER_PIO_2, "XFER_PIO_2" }, { XFER_PIO_1, "XFER_PIO_1" }, { XFER_PIO_0, "XFER_PIO_0" }, { XFER_PIO_SLOW, "XFER_PIO_SLOW" } }; ata_bitfield_name_search(xfer, ata_xfer_names) /* * ATA Port attributes */ #define ata_port_show_simple(field, name, format_string, cast) \ static ssize_t \ show_ata_port_##name(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct ata_port *ap = transport_class_to_port(dev); \ \ return scnprintf(buf, 20, format_string, cast ap->field); \ } #define ata_port_simple_attr(field, name, format_string, type) \ ata_port_show_simple(field, name, format_string, (type)) \ static DEVICE_ATTR(name, S_IRUGO, show_ata_port_##name, NULL) ata_port_simple_attr(nr_pmp_links, nr_pmp_links, "%d\n", int); ata_port_simple_attr(stats.idle_irq, idle_irq, "%ld\n", unsigned long); /* We want the port_no sysfs attibute to start at 1 (ap->port_no starts at 0) */ ata_port_simple_attr(port_no + 1, port_no, "%u\n", unsigned int); static DECLARE_TRANSPORT_CLASS(ata_port_class, "ata_port", NULL, NULL, NULL); static void ata_tport_release(struct device *dev) { struct ata_port *ap = tdev_to_port(dev); ata_host_put(ap->host); } /** * ata_is_port -- check if a struct device represents a ATA port * @dev: device to check * * Returns: * %1 if the device represents a ATA Port, %0 else */ static int ata_is_port(const struct device *dev) { return dev->release == ata_tport_release; } static int ata_tport_match(struct attribute_container *cont, struct device *dev) { if (!ata_is_port(dev)) return 0; return &ata_scsi_transport_template->host_attrs.ac == cont; } /** * ata_tport_delete -- remove ATA PORT * @ap: ATA PORT to remove * * Removes the specified ATA PORT. Remove the associated link as well. */ void ata_tport_delete(struct ata_port *ap) { struct device *dev = &ap->tdev; ata_tlink_delete(&ap->link); transport_remove_device(dev); device_del(dev); transport_destroy_device(dev); put_device(dev); } EXPORT_SYMBOL_GPL(ata_tport_delete); static const struct device_type ata_port_sas_type = { .name = ATA_PORT_TYPE_NAME, }; /** ata_tport_add - initialize a transport ATA port structure * * @parent: parent device * @ap: existing ata_port structure * * Initialize a ATA port structure for sysfs. It will be added to the device * tree below the device specified by @parent which could be a PCI device. * * Returns %0 on success */ int ata_tport_add(struct device *parent, struct ata_port *ap) { int error; struct device *dev = &ap->tdev; device_initialize(dev); if (ap->flags & ATA_FLAG_SAS_HOST) dev->type = &ata_port_sas_type; else dev->type = &ata_port_type; dev->parent = parent; ata_host_get(ap->host); dev->release = ata_tport_release; dev_set_name(dev, "ata%d", ap->print_id); transport_setup_device(dev); ata_acpi_bind_port(ap); error = device_add(dev); if (error) { goto tport_err; } device_enable_async_suspend(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); pm_runtime_forbid(dev); error = transport_add_device(dev); if (error) goto tport_transport_add_err; transport_configure_device(dev); error = ata_tlink_add(&ap->link); if (error) { goto tport_link_err; } return 0; tport_link_err: transport_remove_device(dev); tport_transport_add_err: device_del(dev); tport_err: transport_destroy_device(dev); put_device(dev); return error; } EXPORT_SYMBOL_GPL(ata_tport_add); /** * ata_port_classify - determine device type based on ATA-spec signature * @ap: ATA port device on which the classification should be run * @tf: ATA taskfile register set for device to be identified * * A wrapper around ata_dev_classify() to provide additional logging * * RETURNS: * Device type, %ATA_DEV_ATA, %ATA_DEV_ATAPI, %ATA_DEV_PMP, * %ATA_DEV_ZAC, or %ATA_DEV_UNKNOWN the event of failure. */ unsigned int ata_port_classify(struct ata_port *ap, const struct ata_taskfile *tf) { int i; unsigned int class = ata_dev_classify(tf); /* Start with index '1' to skip the 'unknown' entry */ for (i = 1; i < ARRAY_SIZE(ata_class_names); i++) { if (ata_class_names[i].value == class) { ata_port_dbg(ap, "found %s device by sig\n", ata_class_names[i].name); return class; } } ata_port_info(ap, "found unknown device (class %u)\n", class); return class; } EXPORT_SYMBOL_GPL(ata_port_classify); /* * ATA device attributes */ #define ata_dev_show_class(title, field) \ static ssize_t \ show_ata_dev_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct ata_device *ata_dev = transport_class_to_dev(dev); \ \ return get_ata_##title##_names(ata_dev->field, buf); \ } #define ata_dev_attr(title, field) \ ata_dev_show_class(title, field) \ static DEVICE_ATTR(field, S_IRUGO, show_ata_dev_##field, NULL) ata_dev_attr(class, class); ata_dev_attr(xfer, pio_mode); ata_dev_attr(xfer, dma_mode); ata_dev_attr(xfer, xfer_mode); #define ata_dev_show_simple(field, format_string, cast) \ static ssize_t \ show_ata_dev_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct ata_device *ata_dev = transport_class_to_dev(dev); \ \ return scnprintf(buf, 20, format_string, cast ata_dev->field); \ } #define ata_dev_simple_attr(field, format_string, type) \ ata_dev_show_simple(field, format_string, (type)) \ static DEVICE_ATTR(field, S_IRUGO, \ show_ata_dev_##field, NULL) ata_dev_simple_attr(spdn_cnt, "%d\n", int); struct ata_show_ering_arg { char* buf; int written; }; static int ata_show_ering(struct ata_ering_entry *ent, void *void_arg) { struct ata_show_ering_arg* arg = void_arg; u64 seconds; u32 rem; seconds = div_u64_rem(ent->timestamp, HZ, &rem); arg->written += sprintf(arg->buf + arg->written, "[%5llu.%09lu]", seconds, rem * NSEC_PER_SEC / HZ); arg->written += get_ata_err_names(ent->err_mask, arg->buf + arg->written); return 0; } static ssize_t show_ata_dev_ering(struct device *dev, struct device_attribute *attr, char *buf) { struct ata_device *ata_dev = transport_class_to_dev(dev); struct ata_show_ering_arg arg = { buf, 0 }; ata_ering_map(&ata_dev->ering, ata_show_ering, &arg); return arg.written; } static DEVICE_ATTR(ering, S_IRUGO, show_ata_dev_ering, NULL); static ssize_t show_ata_dev_id(struct device *dev, struct device_attribute *attr, char *buf) { struct ata_device *ata_dev = transport_class_to_dev(dev); int written = 0, i = 0; if (ata_dev->class == ATA_DEV_PMP) return 0; for(i=0;i<ATA_ID_WORDS;i++) { written += scnprintf(buf+written, 20, "%04x%c", ata_dev->id[i], ((i+1) & 7) ? ' ' : '\n'); } return written; } static DEVICE_ATTR(id, S_IRUGO, show_ata_dev_id, NULL); static ssize_t show_ata_dev_gscr(struct device *dev, struct device_attribute *attr, char *buf) { struct ata_device *ata_dev = transport_class_to_dev(dev); int written = 0, i = 0; if (ata_dev->class != ATA_DEV_PMP) return 0; for(i=0;i<SATA_PMP_GSCR_DWORDS;i++) { written += scnprintf(buf+written, 20, "%08x%c", ata_dev->gscr[i], ((i+1) & 3) ? ' ' : '\n'); } if (SATA_PMP_GSCR_DWORDS & 3) buf[written-1] = '\n'; return written; } static DEVICE_ATTR(gscr, S_IRUGO, show_ata_dev_gscr, NULL); static ssize_t show_ata_dev_trim(struct device *dev, struct device_attribute *attr, char *buf) { struct ata_device *ata_dev = transport_class_to_dev(dev); unsigned char *mode; if (!ata_id_has_trim(ata_dev->id)) mode = "unsupported"; else if (ata_dev->quirks & ATA_QUIRK_NOTRIM) mode = "forced_unsupported"; else if (ata_dev->quirks & ATA_QUIRK_NO_NCQ_TRIM) mode = "forced_unqueued"; else if (ata_fpdma_dsm_supported(ata_dev)) mode = "queued"; else mode = "unqueued"; return scnprintf(buf, 20, "%s\n", mode); } static DEVICE_ATTR(trim, S_IRUGO, show_ata_dev_trim, NULL); static DECLARE_TRANSPORT_CLASS(ata_dev_class, "ata_device", NULL, NULL, NULL); static void ata_tdev_release(struct device *dev) { } /** * ata_is_ata_dev -- check if a struct device represents a ATA device * @dev: device to check * * Returns: * true if the device represents a ATA device, false otherwise */ static bool ata_is_ata_dev(const struct device *dev) { return dev->release == ata_tdev_release; } static int ata_tdev_match(struct attribute_container *cont, struct device *dev) { struct ata_internal *i = to_ata_internal(ata_scsi_transport_template); if (!ata_is_ata_dev(dev)) return 0; return &i->dev_attr_cont.ac == cont; } /** * ata_tdev_free -- free an ATA transport device * @dev: struct ata_device owning the transport device to free * * Free the ATA transport device for the specified ATA device. * * Note: * This function must only be called for a ATA transport device that has not * yet successfully been added using ata_tdev_add(). */ static void ata_tdev_free(struct ata_device *dev) { transport_destroy_device(&dev->tdev); put_device(&dev->tdev); } /** * ata_tdev_delete -- remove an ATA transport device * @ata_dev: struct ata_device owning the transport device to delete * * Removes the ATA transport device for the specified ATA device. */ static void ata_tdev_delete(struct ata_device *ata_dev) { struct device *dev = &ata_dev->tdev; transport_remove_device(dev); device_del(dev); ata_tdev_free(ata_dev); } /** * ata_tdev_add -- initialize an ATA transport device * @ata_dev: struct ata_device owning the transport device to add * * Initialize an ATA transport device for sysfs. It will be added in the * device tree below the ATA link device it belongs to. * * Returns %0 on success and a negative error code on error. */ static int ata_tdev_add(struct ata_device *ata_dev) { struct device *dev = &ata_dev->tdev; struct ata_link *link = ata_dev->link; struct ata_port *ap = link->ap; int error; device_initialize(dev); dev->parent = &link->tdev; dev->release = ata_tdev_release; if (ata_is_host_link(link)) dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno); else dev_set_name(dev, "dev%d.%d.0", ap->print_id, link->pmp); transport_setup_device(dev); ata_acpi_bind_dev(ata_dev); error = device_add(dev); if (error) { ata_tdev_free(ata_dev); return error; } error = transport_add_device(dev); if (error) { device_del(dev); ata_tdev_free(ata_dev); return error; } transport_configure_device(dev); return 0; } /* * ATA link attributes */ static int noop(int x) { return x; } #define ata_link_show_linkspeed(field, format) \ static ssize_t \ show_ata_link_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct ata_link *link = transport_class_to_link(dev); \ \ return sprintf(buf, "%s\n", \ sata_spd_string(format(link->field))); \ } #define ata_link_linkspeed_attr(field, format) \ ata_link_show_linkspeed(field, format) \ static DEVICE_ATTR(field, 0444, show_ata_link_##field, NULL) ata_link_linkspeed_attr(hw_sata_spd_limit, fls); ata_link_linkspeed_attr(sata_spd_limit, fls); ata_link_linkspeed_attr(sata_spd, noop); static DECLARE_TRANSPORT_CLASS(ata_link_class, "ata_link", NULL, NULL, NULL); static void ata_tlink_release(struct device *dev) { } /** * ata_is_link -- check if a struct device represents a ATA link * @dev: device to check * * Returns: * true if the device represents a ATA link, false otherwise */ static bool ata_is_link(const struct device *dev) { return dev->release == ata_tlink_release; } static int ata_tlink_match(struct attribute_container *cont, struct device *dev) { struct ata_internal *i = to_ata_internal(ata_scsi_transport_template); if (!ata_is_link(dev)) return 0; return &i->link_attr_cont.ac == cont; } /** * ata_tlink_delete -- remove an ATA link transport device * @link: struct ata_link owning the link transport device to remove * * Removes the link transport device of the specified ATA link. This also * removes the ATA device(s) associated with the link as well. */ void ata_tlink_delete(struct ata_link *link) { struct device *dev = &link->tdev; struct ata_device *ata_dev; ata_for_each_dev(ata_dev, link, ALL) { ata_tdev_delete(ata_dev); } transport_remove_device(dev); device_del(dev); transport_destroy_device(dev); put_device(dev); } /** * ata_tlink_add -- initialize an ATA link transport device * @link: struct ata_link owning the link transport device to initialize * * Initialize an ATA link transport device for sysfs. It will be added in the * device tree below the ATA port it belongs to. * * Returns %0 on success and a negative error code on error. */ int ata_tlink_add(struct ata_link *link) { struct device *dev = &link->tdev; struct ata_port *ap = link->ap; struct ata_device *ata_dev; int error; device_initialize(dev); dev->parent = &ap->tdev; dev->release = ata_tlink_release; if (ata_is_host_link(link)) dev_set_name(dev, "link%d", ap->print_id); else dev_set_name(dev, "link%d.%d", ap->print_id, link->pmp); transport_setup_device(dev); error = device_add(dev); if (error) goto tlink_err; error = transport_add_device(dev); if (error) goto tlink_transport_err; transport_configure_device(dev); ata_for_each_dev(ata_dev, link, ALL) { error = ata_tdev_add(ata_dev); if (error) goto tlink_dev_err; } return 0; tlink_dev_err: while (--ata_dev >= link->device) ata_tdev_delete(ata_dev); transport_remove_device(dev); tlink_transport_err: device_del(dev); tlink_err: transport_destroy_device(dev); put_device(dev); return error; } /* * Setup / Teardown code */ #define SETUP_TEMPLATE(attrb, field, perm, test) \ i->private_##attrb[count] = dev_attr_##field; \ i->private_##attrb[count].attr.mode = perm; \ i->attrb[count] = &i->private_##attrb[count]; \ if (test) \ count++ #define SETUP_LINK_ATTRIBUTE(field) \ SETUP_TEMPLATE(link_attrs, field, S_IRUGO, 1) #define SETUP_PORT_ATTRIBUTE(field) \ SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1) #define SETUP_DEV_ATTRIBUTE(field) \ SETUP_TEMPLATE(dev_attrs, field, S_IRUGO, 1) /** * ata_attach_transport -- instantiate ATA transport template */ struct scsi_transport_template *ata_attach_transport(void) { struct ata_internal *i; int count; i = kzalloc(sizeof(struct ata_internal), GFP_KERNEL); if (!i) return NULL; i->t.eh_strategy_handler = ata_scsi_error; i->t.user_scan = ata_scsi_user_scan; i->t.host_attrs.ac.attrs = &i->port_attrs[0]; i->t.host_attrs.ac.class = &ata_port_class.class; i->t.host_attrs.ac.match = ata_tport_match; transport_container_register(&i->t.host_attrs); i->link_attr_cont.ac.class = &ata_link_class.class; i->link_attr_cont.ac.attrs = &i->link_attrs[0]; i->link_attr_cont.ac.match = ata_tlink_match; transport_container_register(&i->link_attr_cont); i->dev_attr_cont.ac.class = &ata_dev_class.class; i->dev_attr_cont.ac.attrs = &i->dev_attrs[0]; i->dev_attr_cont.ac.match = ata_tdev_match; transport_container_register(&i->dev_attr_cont); count = 0; SETUP_PORT_ATTRIBUTE(nr_pmp_links); SETUP_PORT_ATTRIBUTE(idle_irq); SETUP_PORT_ATTRIBUTE(port_no); BUG_ON(count > ATA_PORT_ATTRS); i->port_attrs[count] = NULL; count = 0; SETUP_LINK_ATTRIBUTE(hw_sata_spd_limit); SETUP_LINK_ATTRIBUTE(sata_spd_limit); SETUP_LINK_ATTRIBUTE(sata_spd); BUG_ON(count > ATA_LINK_ATTRS); i->link_attrs[count] = NULL; count = 0; SETUP_DEV_ATTRIBUTE(class); SETUP_DEV_ATTRIBUTE(pio_mode); SETUP_DEV_ATTRIBUTE(dma_mode); SETUP_DEV_ATTRIBUTE(xfer_mode); SETUP_DEV_ATTRIBUTE(spdn_cnt); SETUP_DEV_ATTRIBUTE(ering); SETUP_DEV_ATTRIBUTE(id); SETUP_DEV_ATTRIBUTE(gscr); SETUP_DEV_ATTRIBUTE(trim); BUG_ON(count > ATA_DEV_ATTRS); i->dev_attrs[count] = NULL; return &i->t; } /** * ata_release_transport -- release ATA transport template instance * @t: transport template instance */ void ata_release_transport(struct scsi_transport_template *t) { struct ata_internal *i = to_ata_internal(t); transport_container_unregister(&i->t.host_attrs); transport_container_unregister(&i->link_attr_cont); transport_container_unregister(&i->dev_attr_cont); kfree(i); } __init int libata_transport_init(void) { int error; error = transport_class_register(&ata_link_class); if (error) goto out_unregister_transport; error = transport_class_register(&ata_port_class); if (error) goto out_unregister_link; error = transport_class_register(&ata_dev_class); if (error) goto out_unregister_port; return 0; out_unregister_port: transport_class_unregister(&ata_port_class); out_unregister_link: transport_class_unregister(&ata_link_class); out_unregister_transport: return error; } void __exit libata_transport_exit(void) { transport_class_unregister(&ata_link_class); transport_class_unregister(&ata_port_class); transport_class_unregister(&ata_dev_class); } |
28 7 23 34 34 43 43 1 1 28 1 2 2 6 21 5 19 24 23 1 15 15 15 15 15 10 11 11 34 32 31 3 26 6 28 3 6 3 23 5 26 3 11 4 3 22 8 8 6 24 16 23 7 6 28 28 31 1 30 29 29 9 9 9 22 5 13 8 17 4 17 16 16 16 10 3 19 16 3 22 11 4 9 5 1 2 2 4 8 2 2 6 6 19 20 20 2 19 20 20 14 10 20 18 3 20 1 1 1 1 3 3 10 36 1 1 6 4 28 7 24 19 1 1 4 4 6 5 6 9 2 8 8 2 1 2 1 4 4 4 2 1 1 2 9 2 33 35 4 2 7 2 6 8 1 2 1 1 2 6 4 1 2 13 1 1 5 2 2 2 6 2 2 27 1 2 2 7 16 22 2 19 1 1 5 1 3 8 10 5 8 15 11 16 33 32 23 3 2 4 8 8 3 12 14 14 6 5 6 2 13 19 6 6 2 7 5 4 37 1 3 1 4 14 19 39 3 35 4 1 1 12 11 5 2 4 6 3 6 6 1 1 2 2 71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 | // SPDX-License-Identifier: GPL-2.0-only /* * VMware vSockets Driver * * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. */ /* Implementation notes: * * - There are two kinds of sockets: those created by user action (such as * calling socket(2)) and those created by incoming connection request packets. * * - There are two "global" tables, one for bound sockets (sockets that have * specified an address that they are responsible for) and one for connected * sockets (sockets that have established a connection with another socket). * These tables are "global" in that all sockets on the system are placed * within them. - Note, though, that the bound table contains an extra entry * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in * that list. The bound table is used solely for lookup of sockets when packets * are received and that's not necessary for SOCK_DGRAM sockets since we create * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM * sockets out of the bound hash buckets will reduce the chance of collisions * when looking for SOCK_STREAM sockets and prevents us from having to check the * socket type in the hash table lookups. * * - Sockets created by user action will either be "client" sockets that * initiate a connection or "server" sockets that listen for connections; we do * not support simultaneous connects (two "client" sockets connecting). * * - "Server" sockets are referred to as listener sockets throughout this * implementation because they are in the TCP_LISTEN state. When a * connection request is received (the second kind of socket mentioned above), * we create a new socket and refer to it as a pending socket. These pending * sockets are placed on the pending connection list of the listener socket. * When future packets are received for the address the listener socket is * bound to, we check if the source of the packet is from one that has an * existing pending connection. If it does, we process the packet for the * pending socket. When that socket reaches the connected state, it is removed * from the listener socket's pending list and enqueued in the listener * socket's accept queue. Callers of accept(2) will accept connected sockets * from the listener socket's accept queue. If the socket cannot be accepted * for some reason then it is marked rejected. Once the connection is * accepted, it is owned by the user process and the responsibility for cleanup * falls with that user process. * * - It is possible that these pending sockets will never reach the connected * state; in fact, we may never receive another packet after the connection * request. Because of this, we must schedule a cleanup function to run in the * future, after some amount of time passes where a connection should have been * established. This function ensures that the socket is off all lists so it * cannot be retrieved, then drops all references to the socket so it is cleaned * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this * function will also cleanup rejected sockets, those that reach the connected * state but leave it before they have been accepted. * * - Lock ordering for pending or accept queue sockets is: * * lock_sock(listener); * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); * * Using explicit nested locking keeps lockdep happy since normally only one * lock of a given class may be taken at a time. * * - Sockets created by user action will be cleaned up when the user process * calls close(2), causing our release implementation to be called. Our release * implementation will perform some cleanup then drop the last reference so our * sk_destruct implementation is invoked. Our sk_destruct implementation will * perform additional cleanup that's common for both types of sockets. * * - A socket's reference count is what ensures that the structure won't be * freed. Each entry in a list (such as the "global" bound and connected tables * and the listener socket's pending list and connected queue) ensures a * reference. When we defer work until process context and pass a socket as our * argument, we must ensure the reference count is increased to ensure the * socket isn't freed before the function is run; the deferred function will * then drop the reference. * * - sk->sk_state uses the TCP state constants because they are widely used by * other address families and exposed to userspace tools like ss(8): * * TCP_CLOSE - unconnected * TCP_SYN_SENT - connecting * TCP_ESTABLISHED - connected * TCP_CLOSING - disconnecting * TCP_LISTEN - listening */ #include <linux/compat.h> #include <linux/types.h> #include <linux/bitops.h> #include <linux/cred.h> #include <linux/errqueue.h> #include <linux/init.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/net.h> #include <linux/poll.h> #include <linux/random.h> #include <linux/skbuff.h> #include <linux/smp.h> #include <linux/socket.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/wait.h> #include <linux/workqueue.h> #include <net/sock.h> #include <net/af_vsock.h> #include <uapi/linux/vm_sockets.h> #include <uapi/asm-generic/ioctls.h> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); /* Protocol family. */ struct proto vsock_proto = { .name = "AF_VSOCK", .owner = THIS_MODULE, .obj_size = sizeof(struct vsock_sock), #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = vsock_bpf_update_proto, #endif }; /* The default peer timeout indicates how long we will wait for a peer response * to a control message. */ #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) #define VSOCK_DEFAULT_BUFFER_SIZE (1024 * 256) #define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256) #define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128 /* Transport used for host->guest communication */ static const struct vsock_transport *transport_h2g; /* Transport used for guest->host communication */ static const struct vsock_transport *transport_g2h; /* Transport used for DGRAM communication */ static const struct vsock_transport *transport_dgram; /* Transport used for local communication */ static const struct vsock_transport *transport_local; static DEFINE_MUTEX(vsock_register_mutex); /**** UTILS ****/ /* Each bound VSocket is stored in the bind hash table and each connected * VSocket is stored in the connected hash table. * * Unbound sockets are all put on the same list attached to the end of the hash * table (vsock_unbound_sockets). Bound sockets are added to the hash table in * the bucket that their local address hashes to (vsock_bound_sockets(addr) * represents the list that addr hashes to). * * Specifically, we initialize the vsock_bind_table array to a size of * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function * mods with VSOCK_HASH_SIZE to ensure this. */ #define MAX_PORT_RETRIES 24 #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) /* XXX This can probably be implemented in a better way. */ #define VSOCK_CONN_HASH(src, dst) \ (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE) #define vsock_connected_sockets(src, dst) \ (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) #define vsock_connected_sockets_vsk(vsk) \ vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; EXPORT_SYMBOL_GPL(vsock_bind_table); struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; EXPORT_SYMBOL_GPL(vsock_connected_table); DEFINE_SPINLOCK(vsock_table_lock); EXPORT_SYMBOL_GPL(vsock_table_lock); /* Autobind this socket to the local address if necessary. */ static int vsock_auto_bind(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); struct sockaddr_vm local_addr; if (vsock_addr_bound(&vsk->local_addr)) return 0; vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); return __vsock_bind(sk, &local_addr); } static void vsock_init_tables(void) { int i; for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) INIT_LIST_HEAD(&vsock_bind_table[i]); for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) INIT_LIST_HEAD(&vsock_connected_table[i]); } static void __vsock_insert_bound(struct list_head *list, struct vsock_sock *vsk) { sock_hold(&vsk->sk); list_add(&vsk->bound_table, list); } static void __vsock_insert_connected(struct list_head *list, struct vsock_sock *vsk) { sock_hold(&vsk->sk); list_add(&vsk->connected_table, list); } static void __vsock_remove_bound(struct vsock_sock *vsk) { list_del_init(&vsk->bound_table); sock_put(&vsk->sk); } static void __vsock_remove_connected(struct vsock_sock *vsk) { list_del_init(&vsk->connected_table); sock_put(&vsk->sk); } static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) { struct vsock_sock *vsk; list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) { if (vsock_addr_equals_addr(addr, &vsk->local_addr)) return sk_vsock(vsk); if (addr->svm_port == vsk->local_addr.svm_port && (vsk->local_addr.svm_cid == VMADDR_CID_ANY || addr->svm_cid == VMADDR_CID_ANY)) return sk_vsock(vsk); } return NULL; } static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { struct vsock_sock *vsk; list_for_each_entry(vsk, vsock_connected_sockets(src, dst), connected_table) { if (vsock_addr_equals_addr(src, &vsk->remote_addr) && dst->svm_port == vsk->local_addr.svm_port) { return sk_vsock(vsk); } } return NULL; } static void vsock_insert_unbound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); __vsock_insert_bound(vsock_unbound_sockets, vsk); spin_unlock_bh(&vsock_table_lock); } void vsock_insert_connected(struct vsock_sock *vsk) { struct list_head *list = vsock_connected_sockets( &vsk->remote_addr, &vsk->local_addr); spin_lock_bh(&vsock_table_lock); __vsock_insert_connected(list, vsk); spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_insert_connected); void vsock_remove_bound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); if (__vsock_in_bound_table(vsk)) __vsock_remove_bound(vsk); spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_remove_bound); void vsock_remove_connected(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); if (__vsock_in_connected_table(vsk)) __vsock_remove_connected(vsk); spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_remove_connected); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) { struct sock *sk; spin_lock_bh(&vsock_table_lock); sk = __vsock_find_bound_socket(addr); if (sk) sock_hold(sk); spin_unlock_bh(&vsock_table_lock); return sk; } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { struct sock *sk; spin_lock_bh(&vsock_table_lock); sk = __vsock_find_connected_socket(src, dst); if (sk) sock_hold(sk); spin_unlock_bh(&vsock_table_lock); return sk; } EXPORT_SYMBOL_GPL(vsock_find_connected_socket); void vsock_remove_sock(struct vsock_sock *vsk) { vsock_remove_bound(vsk); vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); void vsock_for_each_connected_socket(struct vsock_transport *transport, void (*fn)(struct sock *sk)) { int i; spin_lock_bh(&vsock_table_lock); for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], connected_table) { if (vsk->transport != transport) continue; fn(sk_vsock(vsk)); } } spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); void vsock_add_pending(struct sock *listener, struct sock *pending) { struct vsock_sock *vlistener; struct vsock_sock *vpending; vlistener = vsock_sk(listener); vpending = vsock_sk(pending); sock_hold(pending); sock_hold(listener); list_add_tail(&vpending->pending_links, &vlistener->pending_links); } EXPORT_SYMBOL_GPL(vsock_add_pending); void vsock_remove_pending(struct sock *listener, struct sock *pending) { struct vsock_sock *vpending = vsock_sk(pending); list_del_init(&vpending->pending_links); sock_put(listener); sock_put(pending); } EXPORT_SYMBOL_GPL(vsock_remove_pending); void vsock_enqueue_accept(struct sock *listener, struct sock *connected) { struct vsock_sock *vlistener; struct vsock_sock *vconnected; vlistener = vsock_sk(listener); vconnected = vsock_sk(connected); sock_hold(connected); sock_hold(listener); list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); } EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { if (!transport_local) return false; if (remote_cid == VMADDR_CID_LOCAL) return true; if (transport_g2h) { return remote_cid == transport_g2h->get_local_cid(); } else { return remote_cid == VMADDR_CID_HOST; } } static void vsock_deassign_transport(struct vsock_sock *vsk) { if (!vsk->transport) return; vsk->transport->destruct(vsk); module_put(vsk->transport->module); vsk->transport = NULL; } /* Assign a transport to a socket and call the .init transport callback. * * Note: for connection oriented socket this must be called when vsk->remote_addr * is set (e.g. during the connect() or when a connection request on a listener * socket is received). * The vsk->remote_addr is used to decide which transport to use: * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if * g2h is not loaded, will use local transport; * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport; * - remote CID > VMADDR_CID_HOST will use host->guest transport; */ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) { const struct vsock_transport *new_transport; struct sock *sk = sk_vsock(vsk); unsigned int remote_cid = vsk->remote_addr.svm_cid; __u8 remote_flags; int ret; /* If the packet is coming with the source and destination CIDs higher * than VMADDR_CID_HOST, then a vsock channel where all the packets are * forwarded to the host should be established. Then the host will * need to forward the packets to the guest. * * The flag is set on the (listen) receive path (psk is not NULL). On * the connect path the flag can be set by the user space application. */ if (psk && vsk->local_addr.svm_cid > VMADDR_CID_HOST && vsk->remote_addr.svm_cid > VMADDR_CID_HOST) vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST; remote_flags = vsk->remote_addr.svm_flags; switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; break; case SOCK_STREAM: case SOCK_SEQPACKET: if (vsock_use_local_transport(remote_cid)) new_transport = transport_local; else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g || (remote_flags & VMADDR_FLAG_TO_HOST)) new_transport = transport_g2h; else new_transport = transport_h2g; break; default: return -ESOCKTNOSUPPORT; } if (vsk->transport) { if (vsk->transport == new_transport) return 0; /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we * have already held the sock lock. In the other cases, this * function is called on a new socket which is not assigned to * any transport. */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); } /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ if (!new_transport || !try_module_get(new_transport->module)) return -ENODEV; if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || !new_transport->seqpacket_allow(remote_cid)) { module_put(new_transport->module); return -ESOCKTNOSUPPORT; } } ret = new_transport->init(vsk, psk); if (ret) { module_put(new_transport->module); return ret; } vsk->transport = new_transport; return 0; } EXPORT_SYMBOL_GPL(vsock_assign_transport); bool vsock_find_cid(unsigned int cid) { if (transport_g2h && cid == transport_g2h->get_local_cid()) return true; if (transport_h2g && cid == VMADDR_CID_HOST) return true; if (transport_local && cid == VMADDR_CID_LOCAL) return true; return false; } EXPORT_SYMBOL_GPL(vsock_find_cid); static struct sock *vsock_dequeue_accept(struct sock *listener) { struct vsock_sock *vlistener; struct vsock_sock *vconnected; vlistener = vsock_sk(listener); if (list_empty(&vlistener->accept_queue)) return NULL; vconnected = list_entry(vlistener->accept_queue.next, struct vsock_sock, accept_queue); list_del_init(&vconnected->accept_queue); sock_put(listener); /* The caller will need a reference on the connected socket so we let * it call sock_put(). */ return sk_vsock(vconnected); } static bool vsock_is_accept_queue_empty(struct sock *sk) { struct vsock_sock *vsk = vsock_sk(sk); return list_empty(&vsk->accept_queue); } static bool vsock_is_pending(struct sock *sk) { struct vsock_sock *vsk = vsock_sk(sk); return !list_empty(&vsk->pending_links); } static int vsock_send_shutdown(struct sock *sk, int mode) { struct vsock_sock *vsk = vsock_sk(sk); if (!vsk->transport) return -ENODEV; return vsk->transport->shutdown(vsk, mode); } static void vsock_pending_work(struct work_struct *work) { struct sock *sk; struct sock *listener; struct vsock_sock *vsk; bool cleanup; vsk = container_of(work, struct vsock_sock, pending_work.work); sk = sk_vsock(vsk); listener = vsk->listener; cleanup = true; lock_sock(listener); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); sk_acceptq_removed(listener); } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We * just need to drop our references to the sockets and be on * our way. */ cleanup = false; goto out; } /* We need to remove ourself from the global connected sockets list so * incoming packets can't find this socket, and to reduce the reference * count. */ vsock_remove_connected(vsk); sk->sk_state = TCP_CLOSE; out: release_sock(sk); release_sock(listener); if (cleanup) sock_put(sk); sock_put(sk); sock_put(listener); } /**** SOCKET OPERATIONS ****/ static int __vsock_bind_connectible(struct vsock_sock *vsk, struct sockaddr_vm *addr) { static u32 port; struct sockaddr_vm new_addr; if (!port) port = get_random_u32_above(LAST_RESERVED_PORT); vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); if (addr->svm_port == VMADDR_PORT_ANY) { bool found = false; unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { if (port <= LAST_RESERVED_PORT) port = LAST_RESERVED_PORT + 1; new_addr.svm_port = port++; if (!__vsock_find_bound_socket(&new_addr)) { found = true; break; } } if (!found) return -EADDRNOTAVAIL; } else { /* If port is in reserved range, ensure caller * has necessary privileges. */ if (addr->svm_port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) { return -EACCES; } if (__vsock_find_bound_socket(&new_addr)) return -EADDRINUSE; } vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); /* Remove connection oriented sockets from the unbound list and add them * to the hash table for easy lookup by its address. The unbound list * is simply an extra entry at the end of the hash table, a trick used * by AF_UNIX. */ __vsock_remove_bound(vsk); __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); return 0; } static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { return vsk->transport->dgram_bind(vsk, addr); } static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) { struct vsock_sock *vsk = vsock_sk(sk); int retval; /* First ensure this socket isn't already bound. */ if (vsock_addr_bound(&vsk->local_addr)) return -EINVAL; /* Now bind to the provided address or select appropriate values if * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that * like AF_INET prevents binding to a non-local IP address (in most * cases), we only allow binding to a local CID. */ if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid)) return -EADDRNOTAVAIL; switch (sk->sk_socket->type) { case SOCK_STREAM: case SOCK_SEQPACKET: spin_lock_bh(&vsock_table_lock); retval = __vsock_bind_connectible(vsk, addr); spin_unlock_bh(&vsock_table_lock); break; case SOCK_DGRAM: retval = __vsock_bind_dgram(vsk, addr); break; default: retval = -EINVAL; break; } return retval; } static void vsock_connect_timeout(struct work_struct *work); static struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, gfp_t priority, unsigned short type, int kern) { struct sock *sk; struct vsock_sock *psk; struct vsock_sock *vsk; sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern); if (!sk) return NULL; sock_init_data(sock, sk); /* sk->sk_type is normally set in sock_init_data, but only if sock is * non-NULL. We make sure that our sockets always have a type by * setting it here if needed. */ if (!sock) sk->sk_type = type; vsk = vsock_sk(sk); vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); sk->sk_destruct = vsock_sk_destruct; sk->sk_backlog_rcv = vsock_queue_rcv_skb; sock_reset_flag(sk, SOCK_DONE); INIT_LIST_HEAD(&vsk->bound_table); INIT_LIST_HEAD(&vsk->connected_table); vsk->listener = NULL; INIT_LIST_HEAD(&vsk->pending_links); INIT_LIST_HEAD(&vsk->accept_queue); vsk->rejected = false; vsk->sent_request = false; vsk->ignore_connecting_rst = false; vsk->peer_shutdown = 0; INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); psk = parent ? vsock_sk(parent) : NULL; if (parent) { vsk->trusted = psk->trusted; vsk->owner = get_cred(psk->owner); vsk->connect_timeout = psk->connect_timeout; vsk->buffer_size = psk->buffer_size; vsk->buffer_min_size = psk->buffer_min_size; vsk->buffer_max_size = psk->buffer_max_size; security_sk_clone(parent, sk); } else { vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN); vsk->owner = get_current_cred(); vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE; vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE; vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE; } return sk; } static bool sock_type_connectible(u16 type) { return (type == SOCK_STREAM) || (type == SOCK_SEQPACKET); } static void __vsock_release(struct sock *sk, int level) { if (sk) { struct sock *pending; struct vsock_sock *vsk; vsk = vsock_sk(sk); pending = NULL; /* Compiler warning. */ /* When "level" is SINGLE_DEPTH_NESTING, use the nested * version to avoid the warning "possible recursive locking * detected". When "level" is 0, lock_sock_nested(sk, level) * is the same as lock_sock(sk). */ lock_sock_nested(sk, level); if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(&sk->sk_receive_queue); /* Clean up any sockets that never were accepted. */ while ((pending = vsock_dequeue_accept(sk)) != NULL) { __vsock_release(pending, SINGLE_DEPTH_NESTING); sock_put(pending); } release_sock(sk); sock_put(sk); } } static void vsock_sk_destruct(struct sock *sk) { struct vsock_sock *vsk = vsock_sk(sk); vsock_deassign_transport(vsk); /* When clearing these addresses, there's no need to set the family and * possibly register the address family with the kernel. */ vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); put_cred(vsk->owner); } static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err; err = sock_queue_rcv_skb(sk, skb); if (err) kfree_skb(skb); return err; } struct sock *vsock_create_connected(struct sock *parent) { return __vsock_create(sock_net(parent), NULL, parent, GFP_KERNEL, parent->sk_type, 0); } EXPORT_SYMBOL_GPL(vsock_create_connected); s64 vsock_stream_has_data(struct vsock_sock *vsk) { return vsk->transport->stream_has_data(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_data); s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); if (sk->sk_type == SOCK_SEQPACKET) return vsk->transport->seqpacket_has_data(vsk); else return vsock_stream_has_data(vsk); } EXPORT_SYMBOL_GPL(vsock_connectible_has_data); s64 vsock_stream_has_space(struct vsock_sock *vsk) { return vsk->transport->stream_has_space(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_space); void vsock_data_ready(struct sock *sk) { struct vsock_sock *vsk = vsock_sk(sk); if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat || sock_flag(sk, SOCK_DONE)) sk->sk_data_ready(sk); } EXPORT_SYMBOL_GPL(vsock_data_ready); static int vsock_release(struct socket *sock) { __vsock_release(sock->sk, 0); sock->sk = NULL; sock->state = SS_FREE; return 0; } static int vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { int err; struct sock *sk; struct sockaddr_vm *vm_addr; sk = sock->sk; if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) return -EINVAL; lock_sock(sk); err = __vsock_bind(sk, vm_addr); release_sock(sk); return err; } static int vsock_getname(struct socket *sock, struct sockaddr *addr, int peer) { int err; struct sock *sk; struct vsock_sock *vsk; struct sockaddr_vm *vm_addr; sk = sock->sk; vsk = vsock_sk(sk); err = 0; lock_sock(sk); if (peer) { if (sock->state != SS_CONNECTED) { err = -ENOTCONN; goto out; } vm_addr = &vsk->remote_addr; } else { vm_addr = &vsk->local_addr; } if (!vm_addr) { err = -EINVAL; goto out; } /* sys_getsockname() and sys_getpeername() pass us a * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately * that macro is defined in socket.c instead of .h, so we hardcode its * value here. */ BUILD_BUG_ON(sizeof(*vm_addr) > 128); memcpy(addr, vm_addr, sizeof(*vm_addr)); err = sizeof(*vm_addr); out: release_sock(sk); return err; } static int vsock_shutdown(struct socket *sock, int mode) { int err; struct sock *sk; /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode * here like the other address families do. Note also that the * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), * which is what we want. */ mode++; if ((mode & ~SHUTDOWN_MASK) || !mode) return -EINVAL; /* If this is a connection oriented socket and it is not connected then * bail out immediately. If it is a DGRAM socket then we must first * kick the socket so that it wakes up from any sleeping calls, for * example recv(), and then afterwards return the error. */ sk = sock->sk; lock_sock(sk); if (sock->state == SS_UNCONNECTED) { err = -ENOTCONN; if (sock_type_connectible(sk->sk_type)) goto out; } else { sock->state = SS_DISCONNECTING; err = 0; } /* Receive and send shutdowns are treated alike. */ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); if (mode) { sk->sk_shutdown |= mode; sk->sk_state_change(sk); if (sock_type_connectible(sk->sk_type)) { sock_reset_flag(sk, SOCK_DONE); vsock_send_shutdown(sk, mode); } } out: release_sock(sk); return err; } static __poll_t vsock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk; __poll_t mask; struct vsock_sock *vsk; sk = sock->sk; vsk = vsock_sk(sk); poll_wait(file, sk_sleep(sk), wait); mask = 0; if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) /* Signify that there has been an error on this socket. */ mask |= EPOLLERR; /* INET sockets treat local write shutdown and peer write shutdown as a * case of EPOLLHUP set. */ if ((sk->sk_shutdown == SHUTDOWN_MASK) || ((sk->sk_shutdown & SEND_SHUTDOWN) && (vsk->peer_shutdown & SEND_SHUTDOWN))) { mask |= EPOLLHUP; } if (sk->sk_shutdown & RCV_SHUTDOWN || vsk->peer_shutdown & SEND_SHUTDOWN) { mask |= EPOLLRDHUP; } if (sock->type == SOCK_DGRAM) { /* For datagram sockets we can read if there is something in * the queue and write as long as the socket isn't shutdown for * sending. */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) { mask |= EPOLLIN | EPOLLRDNORM; } if (!(sk->sk_shutdown & SEND_SHUTDOWN)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; } else if (sock_type_connectible(sk->sk_type)) { const struct vsock_transport *transport; lock_sock(sk); transport = vsk->transport; /* Listening sockets that have connections in their accept * queue can be read. */ if (sk->sk_state == TCP_LISTEN && !vsock_is_accept_queue_empty(sk)) mask |= EPOLLIN | EPOLLRDNORM; /* If there is something in the queue then we can read. */ if (transport && transport->stream_is_active(vsk) && !(sk->sk_shutdown & RCV_SHUTDOWN)) { bool data_ready_now = false; int target = sock_rcvlowat(sk, 0, INT_MAX); int ret = transport->notify_poll_in( vsk, target, &data_ready_now); if (ret < 0) { mask |= EPOLLERR; } else { if (data_ready_now) mask |= EPOLLIN | EPOLLRDNORM; } } /* Sockets whose connections have been closed, reset, or * terminated should also be considered read, and we check the * shutdown flag for that. */ if (sk->sk_shutdown & RCV_SHUTDOWN || vsk->peer_shutdown & SEND_SHUTDOWN) { mask |= EPOLLIN | EPOLLRDNORM; } /* Connected sockets that can produce data can be written. */ if (transport && sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( vsk, 1, &space_avail_now); if (ret < 0) { mask |= EPOLLERR; } else { if (space_avail_now) /* Remove EPOLLWRBAND since INET * sockets are not setting it. */ mask |= EPOLLOUT | EPOLLWRNORM; } } } /* Simulate INET socket poll behaviors, which sets * EPOLLOUT|EPOLLWRNORM when peer is closed and nothing to read, * but local send is not shutdown. */ if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) mask |= EPOLLOUT | EPOLLWRNORM; } release_sock(sk); } return mask; } static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) { struct vsock_sock *vsk = vsock_sk(sk); return vsk->transport->read_skb(vsk, read_actor); } static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { int err; struct sock *sk; struct vsock_sock *vsk; struct sockaddr_vm *remote_addr; const struct vsock_transport *transport; if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; /* For now, MSG_DONTWAIT is always assumed... */ err = 0; sk = sock->sk; vsk = vsock_sk(sk); lock_sock(sk); transport = vsk->transport; err = vsock_auto_bind(vsk); if (err) goto out; /* If the provided message contains an address, use that. Otherwise * fall back on the socket's remote handle (if it has been connected). */ if (msg->msg_name && vsock_addr_cast(msg->msg_name, msg->msg_namelen, &remote_addr) == 0) { /* Ensure this address is of the right type and is a valid * destination. */ if (remote_addr->svm_cid == VMADDR_CID_ANY) remote_addr->svm_cid = transport->get_local_cid(); if (!vsock_addr_bound(remote_addr)) { err = -EINVAL; goto out; } } else if (sock->state == SS_CONNECTED) { remote_addr = &vsk->remote_addr; if (remote_addr->svm_cid == VMADDR_CID_ANY) remote_addr->svm_cid = transport->get_local_cid(); /* XXX Should connect() or this function ensure remote_addr is * bound? */ if (!vsock_addr_bound(&vsk->remote_addr)) { err = -EINVAL; goto out; } } else { err = -EINVAL; goto out; } if (!transport->dgram_allow(remote_addr->svm_cid, remote_addr->svm_port)) { err = -EINVAL; goto out; } err = transport->dgram_enqueue(vsk, remote_addr, msg, len); out: release_sock(sk); return err; } static int vsock_dgram_connect(struct socket *sock, struct sockaddr *addr, int addr_len, int flags) { int err; struct sock *sk; struct vsock_sock *vsk; struct sockaddr_vm *remote_addr; sk = sock->sk; vsk = vsock_sk(sk); err = vsock_addr_cast(addr, addr_len, &remote_addr); if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { lock_sock(sk); vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); sock->state = SS_UNCONNECTED; release_sock(sk); return 0; } else if (err != 0) return -EINVAL; lock_sock(sk); err = vsock_auto_bind(vsk); if (err) goto out; if (!vsk->transport->dgram_allow(remote_addr->svm_cid, remote_addr->svm_port)) { err = -EINVAL; goto out; } memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); sock->state = SS_CONNECTED; /* sock map disallows redirection of non-TCP sockets with sk_state != * TCP_ESTABLISHED (see sock_map_redirect_allowed()), so we set * TCP_ESTABLISHED here to allow redirection of connected vsock dgrams. * * This doesn't seem to be abnormal state for datagram sockets, as the * same approach can be see in other datagram socket types as well * (such as unix sockets). */ sk->sk_state = TCP_ESTABLISHED; out: release_sock(sk); return err; } int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct vsock_sock *vsk = vsock_sk(sk); return vsk->transport->dgram_dequeue(vsk, msg, len, flags); } int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { #ifdef CONFIG_BPF_SYSCALL struct sock *sk = sock->sk; const struct proto *prot; prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) return prot->recvmsg(sk, msg, len, flags, NULL); #endif return __vsock_dgram_recvmsg(sock, msg, len, flags); } EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg); static int vsock_do_ioctl(struct socket *sock, unsigned int cmd, int __user *arg) { struct sock *sk = sock->sk; struct vsock_sock *vsk; int ret; vsk = vsock_sk(sk); switch (cmd) { case SIOCOUTQ: { ssize_t n_bytes; if (!vsk->transport || !vsk->transport->unsent_bytes) { ret = -EOPNOTSUPP; break; } if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) { ret = -EINVAL; break; } n_bytes = vsk->transport->unsent_bytes(vsk); if (n_bytes < 0) { ret = n_bytes; break; } ret = put_user(n_bytes, arg); break; } default: ret = -ENOIOCTLCMD; } return ret; } static int vsock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { int ret; lock_sock(sock->sk); ret = vsock_do_ioctl(sock, cmd, (int __user *)arg); release_sock(sock->sk); return ret; } static const struct proto_ops vsock_dgram_ops = { .family = PF_VSOCK, .owner = THIS_MODULE, .release = vsock_release, .bind = vsock_bind, .connect = vsock_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = vsock_getname, .poll = vsock_poll, .ioctl = vsock_ioctl, .listen = sock_no_listen, .shutdown = vsock_shutdown, .sendmsg = vsock_dgram_sendmsg, .recvmsg = vsock_dgram_recvmsg, .mmap = sock_no_mmap, .read_skb = vsock_read_skb, }; static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) { const struct vsock_transport *transport = vsk->transport; if (!transport || !transport->cancel_pkt) return -EOPNOTSUPP; return transport->cancel_pkt(vsk); } static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); lock_sock(sk); if (sk->sk_state == TCP_SYN_SENT && (sk->sk_shutdown != SHUTDOWN_MASK)) { sk->sk_state = TCP_CLOSE; sk->sk_socket->state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; sk_error_report(sk); vsock_transport_cancel_pkt(vsk); } release_sock(sk); sock_put(sk); } static int vsock_connect(struct socket *sock, struct sockaddr *addr, int addr_len, int flags) { int err; struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; struct sockaddr_vm *remote_addr; long timeout; DEFINE_WAIT(wait); err = 0; sk = sock->sk; vsk = vsock_sk(sk); lock_sock(sk); /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ switch (sock->state) { case SS_CONNECTED: err = -EISCONN; goto out; case SS_DISCONNECTING: err = -EINVAL; goto out; case SS_CONNECTING: /* This continues on so we can move sock into the SS_CONNECTED * state once the connection has completed (at which point err * will be set to zero also). Otherwise, we will either wait * for the connection or return -EALREADY should this be a * non-blocking call. */ err = -EALREADY; if (flags & O_NONBLOCK) goto out; break; default: if ((sk->sk_state == TCP_LISTEN) || vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { err = -EINVAL; goto out; } /* Set the remote address that we are connecting to. */ memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); err = vsock_assign_transport(vsk, NULL); if (err) goto out; transport = vsk->transport; /* The hypervisor and well-known contexts do not have socket * endpoints. */ if (!transport || !transport->stream_allow(remote_addr->svm_cid, remote_addr->svm_port)) { err = -ENETUNREACH; goto out; } if (vsock_msgzerocopy_allow(transport)) { set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); } else if (sock_flag(sk, SOCK_ZEROCOPY)) { /* If this option was set before 'connect()', * when transport was unknown, check that this * feature is supported here. */ err = -EOPNOTSUPP; goto out; } err = vsock_auto_bind(vsk); if (err) goto out; sk->sk_state = TCP_SYN_SENT; err = transport->connect(vsk); if (err < 0) goto out; /* Mark sock as connecting and set the error code to in * progress in case this is a non-blocking connect. */ sock->state = SS_CONNECTING; err = -EINPROGRESS; } /* The receive path will handle all communication until we are able to * enter the connected state. Here we wait for the connection to be * completed or a notification of an error. */ timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection * attempt, in case the peer doesn't respond in a * timely manner. We hold on to the socket until the * timeout fires. */ sock_hold(sk); /* If the timeout function is already scheduled, * reschedule it, then ungrab the socket refcount to * keep it balanced. */ if (mod_delayed_work(system_wq, &vsk->connect_work, timeout)) sock_put(sk); /* Skip ahead to preserve error code set above. */ goto out_wait; } release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); vsock_remove_connected(vsk); goto out_wait; } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) { err = -ETIMEDOUT; sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } if (sk->sk_err) { err = -sk->sk_err; sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; } else { err = 0; } out_wait: finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; } static int vsock_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { struct sock *listener; int err; struct sock *connected; struct vsock_sock *vconnected; long timeout; DEFINE_WAIT(wait); err = 0; listener = sock->sk; lock_sock(listener); if (!sock_type_connectible(sock->type)) { err = -EOPNOTSUPP; goto out; } if (listener->sk_state != TCP_LISTEN) { err = -EINVAL; goto out; } /* Wait for children sockets to appear; these are the new sockets * created upon connection establishment. */ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK); prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); while ((connected = vsock_dequeue_accept(listener)) == NULL && listener->sk_err == 0) { release_sock(listener); timeout = schedule_timeout(timeout); finish_wait(sk_sleep(listener), &wait); lock_sock(listener); if (signal_pending(current)) { err = sock_intr_errno(timeout); goto out; } else if (timeout == 0) { err = -EAGAIN; goto out; } prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); } finish_wait(sk_sleep(listener), &wait); if (listener->sk_err) err = -listener->sk_err; if (connected) { sk_acceptq_removed(listener); lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); /* If the listener socket has received an error, then we should * reject this socket and return. Note that we simply mark the * socket rejected, drop our reference, and let the cleanup * function handle the cleanup; the fact that we found it in * the listener's accept queue guarantees that the cleanup * function hasn't run yet. */ if (err) { vconnected->rejected = true; } else { newsock->state = SS_CONNECTED; sock_graft(connected, newsock); if (vsock_msgzerocopy_allow(vconnected->transport)) set_bit(SOCK_SUPPORT_ZC, &connected->sk_socket->flags); } release_sock(connected); sock_put(connected); } out: release_sock(listener); return err; } static int vsock_listen(struct socket *sock, int backlog) { int err; struct sock *sk; struct vsock_sock *vsk; sk = sock->sk; lock_sock(sk); if (!sock_type_connectible(sk->sk_type)) { err = -EOPNOTSUPP; goto out; } if (sock->state != SS_UNCONNECTED) { err = -EINVAL; goto out; } vsk = vsock_sk(sk); if (!vsock_addr_bound(&vsk->local_addr)) { err = -EINVAL; goto out; } sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; err = 0; out: release_sock(sk); return err; } static void vsock_update_buffer_size(struct vsock_sock *vsk, const struct vsock_transport *transport, u64 val) { if (val > vsk->buffer_max_size) val = vsk->buffer_max_size; if (val < vsk->buffer_min_size) val = vsk->buffer_min_size; if (val != vsk->buffer_size && transport && transport->notify_buffer_size) transport->notify_buffer_size(vsk, &val); vsk->buffer_size = val; } static int vsock_connectible_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { int err; struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; u64 val; if (level != AF_VSOCK && level != SOL_SOCKET) return -ENOPROTOOPT; #define COPY_IN(_v) \ do { \ if (optlen < sizeof(_v)) { \ err = -EINVAL; \ goto exit; \ } \ if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \ err = -EFAULT; \ goto exit; \ } \ } while (0) err = 0; sk = sock->sk; vsk = vsock_sk(sk); lock_sock(sk); transport = vsk->transport; if (level == SOL_SOCKET) { int zerocopy; if (optname != SO_ZEROCOPY) { release_sock(sk); return sock_setsockopt(sock, level, optname, optval, optlen); } /* Use 'int' type here, because variable to * set this option usually has this type. */ COPY_IN(zerocopy); if (zerocopy < 0 || zerocopy > 1) { err = -EINVAL; goto exit; } if (transport && !vsock_msgzerocopy_allow(transport)) { err = -EOPNOTSUPP; goto exit; } sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy); goto exit; } switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); vsock_update_buffer_size(vsk, transport, val); break; case SO_VM_SOCKETS_BUFFER_MAX_SIZE: COPY_IN(val); vsk->buffer_max_size = val; vsock_update_buffer_size(vsk, transport, vsk->buffer_size); break; case SO_VM_SOCKETS_BUFFER_MIN_SIZE: COPY_IN(val); vsk->buffer_min_size = val; vsock_update_buffer_size(vsk, transport, vsk->buffer_size); break; case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: { struct __kernel_sock_timeval tv; err = sock_copy_user_timeval(&tv, optval, optlen, optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); if (err) break; if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { vsk->connect_timeout = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ)); if (vsk->connect_timeout == 0) vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; } else { err = -ERANGE; } break; } default: err = -ENOPROTOOPT; break; } #undef COPY_IN exit: release_sock(sk); return err; } static int vsock_connectible_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct vsock_sock *vsk = vsock_sk(sk); union { u64 val64; struct old_timeval32 tm32; struct __kernel_old_timeval tm; struct __kernel_sock_timeval stm; } v; int lv = sizeof(v.val64); int len; if (level != AF_VSOCK) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; memset(&v, 0, sizeof(v)); switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: v.val64 = vsk->buffer_size; break; case SO_VM_SOCKETS_BUFFER_MAX_SIZE: v.val64 = vsk->buffer_max_size; break; case SO_VM_SOCKETS_BUFFER_MIN_SIZE: v.val64 = vsk->buffer_min_size; break; case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: lv = sock_get_timeout(vsk->connect_timeout, &v, optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); break; default: return -ENOPROTOOPT; } if (len < lv) return -EINVAL; if (len > lv) len = lv; if (copy_to_user(optval, &v, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; return 0; } static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; ssize_t total_written; long timeout; int err; struct vsock_transport_send_notify_data send_data; DEFINE_WAIT_FUNC(wait, woken_wake_function); sk = sock->sk; vsk = vsock_sk(sk); total_written = 0; err = 0; if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; lock_sock(sk); transport = vsk->transport; /* Callers should not provide a destination with connection oriented * sockets. */ if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out; } /* Send data only if both sides are not shutdown in the direction. */ if (sk->sk_shutdown & SEND_SHUTDOWN || vsk->peer_shutdown & RCV_SHUTDOWN) { err = -EPIPE; goto out; } if (!transport || sk->sk_state != TCP_ESTABLISHED || !vsock_addr_bound(&vsk->local_addr)) { err = -ENOTCONN; goto out; } if (!vsock_addr_bound(&vsk->remote_addr)) { err = -EDESTADDRREQ; goto out; } if (msg->msg_flags & MSG_ZEROCOPY && !vsock_msgzerocopy_allow(transport)) { err = -EOPNOTSUPP; goto out; } /* Wait for room in the produce queue to enqueue our user's data. */ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = transport->notify_send_init(vsk, &send_data); if (err < 0) goto out; while (total_written < len) { ssize_t written; add_wait_queue(sk_sleep(sk), &wait); while (vsock_stream_has_space(vsk) == 0 && sk->sk_err == 0 && !(sk->sk_shutdown & SEND_SHUTDOWN) && !(vsk->peer_shutdown & RCV_SHUTDOWN)) { /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } err = transport->notify_send_pre_block(vsk, &send_data); if (err < 0) { remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } release_sock(sk); timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } else if (timeout == 0) { err = -EAGAIN; remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } } remove_wait_queue(sk_sleep(sk), &wait); /* These checks occur both as part of and after the loop * conditional since we need to check before and after * sleeping. */ if (sk->sk_err) { err = -sk->sk_err; goto out_err; } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || (vsk->peer_shutdown & RCV_SHUTDOWN)) { err = -EPIPE; goto out_err; } err = transport->notify_send_pre_enqueue(vsk, &send_data); if (err < 0) goto out_err; /* Note that enqueue will only write as many bytes as are free * in the produce queue, so we don't need to ensure len is * smaller than the queue size. It is the caller's * responsibility to check how many bytes we were able to send. */ if (sk->sk_type == SOCK_SEQPACKET) { written = transport->seqpacket_enqueue(vsk, msg, len - total_written); } else { written = transport->stream_enqueue(vsk, msg, len - total_written); } if (written < 0) { err = written; goto out_err; } total_written += written; err = transport->notify_send_post_enqueue( vsk, written, &send_data); if (err < 0) goto out_err; } out_err: if (total_written > 0) { /* Return number of written bytes only if: * 1) SOCK_STREAM socket. * 2) SOCK_SEQPACKET socket when whole buffer is sent. */ if (sk->sk_type == SOCK_STREAM || total_written == len) err = total_written; } out: if (sk->sk_type == SOCK_STREAM) err = sk_stream_error(sk, msg->msg_flags, err); release_sock(sk); return err; } static int vsock_connectible_wait_data(struct sock *sk, struct wait_queue_entry *wait, long timeout, struct vsock_transport_recv_notify_data *recv_data, size_t target) { const struct vsock_transport *transport; struct vsock_sock *vsk; s64 data; int err; vsk = vsock_sk(sk); err = 0; transport = vsk->transport; while (1) { prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE); data = vsock_connectible_has_data(vsk); if (data != 0) break; if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) || (vsk->peer_shutdown & SEND_SHUTDOWN)) { break; } /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; break; } if (recv_data) { err = transport->notify_recv_pre_block(vsk, target, recv_data); if (err < 0) break; } release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); break; } else if (timeout == 0) { err = -EAGAIN; break; } } finish_wait(sk_sleep(sk), wait); if (err) return err; /* Internal transport error when checking for available * data. XXX This should be changed to a connection * reset in a later change. */ if (data < 0) return -ENOMEM; return data; } static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { struct vsock_transport_recv_notify_data recv_data; const struct vsock_transport *transport; struct vsock_sock *vsk; ssize_t copied; size_t target; long timeout; int err; DEFINE_WAIT(wait); vsk = vsock_sk(sk); transport = vsk->transport; /* We must not copy less than target bytes into the user's buffer * before returning successfully, so we wait for the consume queue to * have that much data to consume before dequeueing. Note that this * makes it impossible to handle cases where target is greater than the * queue size. */ target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); if (target >= transport->stream_rcvhiwat(vsk)) { err = -ENOMEM; goto out; } timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); copied = 0; err = transport->notify_recv_init(vsk, target, &recv_data); if (err < 0) goto out; while (1) { ssize_t read; err = vsock_connectible_wait_data(sk, &wait, timeout, &recv_data, target); if (err <= 0) break; err = transport->notify_recv_pre_dequeue(vsk, target, &recv_data); if (err < 0) break; read = transport->stream_dequeue(vsk, msg, len - copied, flags); if (read < 0) { err = read; break; } copied += read; err = transport->notify_recv_post_dequeue(vsk, target, read, !(flags & MSG_PEEK), &recv_data); if (err < 0) goto out; if (read >= target || flags & MSG_PEEK) break; target -= read; } if (sk->sk_err) err = -sk->sk_err; else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; if (copied > 0) err = copied; out: return err; } static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { const struct vsock_transport *transport; struct vsock_sock *vsk; ssize_t msg_len; long timeout; int err = 0; DEFINE_WAIT(wait); vsk = vsock_sk(sk); transport = vsk->transport; timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0); if (err <= 0) goto out; msg_len = transport->seqpacket_dequeue(vsk, msg, flags); if (msg_len < 0) { err = msg_len; goto out; } if (sk->sk_err) { err = -sk->sk_err; } else if (sk->sk_shutdown & RCV_SHUTDOWN) { err = 0; } else { /* User sets MSG_TRUNC, so return real length of * packet. */ if (flags & MSG_TRUNC) err = msg_len; else err = len - msg_data_left(msg); /* Always set MSG_TRUNC if real length of packet is * bigger than user's buffer. */ if (msg_len > len) msg->msg_flags |= MSG_TRUNC; } out: return err; } int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; int err; sk = sock->sk; if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR); vsk = vsock_sk(sk); err = 0; lock_sock(sk); transport = vsk->transport; if (!transport || sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a * peer has not connected or a local shutdown occurred with the * SOCK_DONE flag. */ if (sock_flag(sk, SOCK_DONE)) err = 0; else err = -ENOTCONN; goto out; } if (flags & MSG_OOB) { err = -EOPNOTSUPP; goto out; } /* We don't check peer_shutdown flag here since peer may actually shut * down, but there can be data in the queue that a local socket can * receive. */ if (sk->sk_shutdown & RCV_SHUTDOWN) { err = 0; goto out; } /* It is valid on Linux to pass in a zero-length receive buffer. This * is not an error. We may as well bail out now. */ if (!len) { err = 0; goto out; } if (sk->sk_type == SOCK_STREAM) err = __vsock_stream_recvmsg(sk, msg, len, flags); else err = __vsock_seqpacket_recvmsg(sk, msg, len, flags); out: release_sock(sk); return err; } int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { #ifdef CONFIG_BPF_SYSCALL struct sock *sk = sock->sk; const struct proto *prot; prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) return prot->recvmsg(sk, msg, len, flags, NULL); #endif return __vsock_connectible_recvmsg(sock, msg, len, flags); } EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg); static int vsock_set_rcvlowat(struct sock *sk, int val) { const struct vsock_transport *transport; struct vsock_sock *vsk; vsk = vsock_sk(sk); if (val > vsk->buffer_size) return -EINVAL; transport = vsk->transport; if (transport && transport->notify_set_rcvlowat) { int err; err = transport->notify_set_rcvlowat(vsk, val); if (err) return err; } WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); return 0; } static const struct proto_ops vsock_stream_ops = { .family = PF_VSOCK, .owner = THIS_MODULE, .release = vsock_release, .bind = vsock_bind, .connect = vsock_connect, .socketpair = sock_no_socketpair, .accept = vsock_accept, .getname = vsock_getname, .poll = vsock_poll, .ioctl = vsock_ioctl, .listen = vsock_listen, .shutdown = vsock_shutdown, .setsockopt = vsock_connectible_setsockopt, .getsockopt = vsock_connectible_getsockopt, .sendmsg = vsock_connectible_sendmsg, .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, .set_rcvlowat = vsock_set_rcvlowat, .read_skb = vsock_read_skb, }; static const struct proto_ops vsock_seqpacket_ops = { .family = PF_VSOCK, .owner = THIS_MODULE, .release = vsock_release, .bind = vsock_bind, .connect = vsock_connect, .socketpair = sock_no_socketpair, .accept = vsock_accept, .getname = vsock_getname, .poll = vsock_poll, .ioctl = vsock_ioctl, .listen = vsock_listen, .shutdown = vsock_shutdown, .setsockopt = vsock_connectible_setsockopt, .getsockopt = vsock_connectible_getsockopt, .sendmsg = vsock_connectible_sendmsg, .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, .read_skb = vsock_read_skb, }; static int vsock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct vsock_sock *vsk; struct sock *sk; int ret; if (!sock) return -EINVAL; if (protocol && protocol != PF_VSOCK) return -EPROTONOSUPPORT; switch (sock->type) { case SOCK_DGRAM: sock->ops = &vsock_dgram_ops; break; case SOCK_STREAM: sock->ops = &vsock_stream_ops; break; case SOCK_SEQPACKET: sock->ops = &vsock_seqpacket_ops; break; default: return -ESOCKTNOSUPPORT; } sock->state = SS_UNCONNECTED; sk = __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern); if (!sk) return -ENOMEM; vsk = vsock_sk(sk); if (sock->type == SOCK_DGRAM) { ret = vsock_assign_transport(vsk, NULL); if (ret < 0) { sock_put(sk); return ret; } } /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its * proto_ops, so there is no handler for custom logic. */ if (sock_type_connectible(sock->type)) set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); vsock_insert_unbound(vsk); return 0; } static const struct net_proto_family vsock_family_ops = { .family = AF_VSOCK, .create = vsock_create, .owner = THIS_MODULE, }; static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; u32 cid = VMADDR_CID_ANY; int retval = 0; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ if (transport_g2h) cid = transport_g2h->get_local_cid(); else if (transport_h2g) cid = transport_h2g->get_local_cid(); if (put_user(cid, p) != 0) retval = -EFAULT; break; default: retval = -ENOIOCTLCMD; } return retval; } static long vsock_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); } #ifdef CONFIG_COMPAT static long vsock_dev_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); } #endif static const struct file_operations vsock_device_ops = { .owner = THIS_MODULE, .unlocked_ioctl = vsock_dev_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vsock_dev_compat_ioctl, #endif .open = nonseekable_open, }; static struct miscdevice vsock_device = { .name = "vsock", .fops = &vsock_device_ops, }; static int __init vsock_init(void) { int err = 0; vsock_init_tables(); vsock_proto.owner = THIS_MODULE; vsock_device.minor = MISC_DYNAMIC_MINOR; err = misc_register(&vsock_device); if (err) { pr_err("Failed to register misc device\n"); goto err_reset_transport; } err = proto_register(&vsock_proto, 1); /* we want our slab */ if (err) { pr_err("Cannot register vsock protocol\n"); goto err_deregister_misc; } err = sock_register(&vsock_family_ops); if (err) { pr_err("could not register af_vsock (%d) address family: %d\n", AF_VSOCK, err); goto err_unregister_proto; } vsock_bpf_build_proto(); return 0; err_unregister_proto: proto_unregister(&vsock_proto); err_deregister_misc: misc_deregister(&vsock_device); err_reset_transport: return err; } static void __exit vsock_exit(void) { misc_deregister(&vsock_device); sock_unregister(AF_VSOCK); proto_unregister(&vsock_proto); } const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk) { return vsk->transport; } EXPORT_SYMBOL_GPL(vsock_core_get_transport); int vsock_core_register(const struct vsock_transport *t, int features) { const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local; int err = mutex_lock_interruptible(&vsock_register_mutex); if (err) return err; t_h2g = transport_h2g; t_g2h = transport_g2h; t_dgram = transport_dgram; t_local = transport_local; if (features & VSOCK_TRANSPORT_F_H2G) { if (t_h2g) { err = -EBUSY; goto err_busy; } t_h2g = t; } if (features & VSOCK_TRANSPORT_F_G2H) { if (t_g2h) { err = -EBUSY; goto err_busy; } t_g2h = t; } if (features & VSOCK_TRANSPORT_F_DGRAM) { if (t_dgram) { err = -EBUSY; goto err_busy; } t_dgram = t; } if (features & VSOCK_TRANSPORT_F_LOCAL) { if (t_local) { err = -EBUSY; goto err_busy; } t_local = t; } transport_h2g = t_h2g; transport_g2h = t_g2h; transport_dgram = t_dgram; transport_local = t_local; err_busy: mutex_unlock(&vsock_register_mutex); return err; } EXPORT_SYMBOL_GPL(vsock_core_register); void vsock_core_unregister(const struct vsock_transport *t) { mutex_lock(&vsock_register_mutex); if (transport_h2g == t) transport_h2g = NULL; if (transport_g2h == t) transport_g2h = NULL; if (transport_dgram == t) transport_dgram = NULL; if (transport_local == t) transport_local = NULL; mutex_unlock(&vsock_register_mutex); } EXPORT_SYMBOL_GPL(vsock_core_unregister); module_init(vsock_init); module_exit(vsock_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); |
1621 257 1446 224 316 1623 8 55 348 118 174 141 2 8 480 96 697 96 851 292 48 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HIGHMEM_H #define _LINUX_HIGHMEM_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/bug.h> #include <linux/cacheflush.h> #include <linux/kmsan.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include "highmem-internal.h" /** * kmap - Map a page for long term usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * Can only be invoked from preemptible task context because on 32bit * systems with CONFIG_HIGHMEM enabled this function might sleep. * * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area * this returns the virtual address of the direct kernel mapping. * * The returned virtual address is globally visible and valid up to the * point where it is unmapped via kunmap(). The pointer can be handed to * other contexts. * * For highmem pages on 32bit systems this can be slow as the mapping space * is limited and protected by a global lock. In case that there is no * mapping slot available the function blocks until a slot is released via * kunmap(). */ static inline void *kmap(struct page *page); /** * kunmap - Unmap the virtual address mapped by kmap() * @page: Pointer to the page which was mapped by kmap() * * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of * pages in the low memory area. */ static inline void kunmap(struct page *page); /** * kmap_to_page - Get the page for a kmap'ed address * @addr: The address to look up * * Returns: The page which is mapped to @addr. */ static inline struct page *kmap_to_page(void *addr); /** * kmap_flush_unused - Flush all unused kmap mappings in order to * remove stray mappings */ static inline void kmap_flush_unused(void); /** * kmap_local_page - Map a page for temporary usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * Can be invoked from any context, including interrupts. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation: * * addr1 = kmap_local_page(page1); * addr2 = kmap_local_page(page2); * ... * kunmap_local(addr2); * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * * Contrary to kmap() mappings the mapping is only valid in the context of * the caller and cannot be handed to other contexts. * * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * * While kmap_local_page() is significantly faster than kmap() for the highmem * case it comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across * preemption. No caller of kmap_local_page() can rely on this side effect. */ static inline void *kmap_local_page(struct page *page); /** * kmap_local_folio - Map a page in this folio for temporary usage * @folio: The folio containing the page. * @offset: The byte offset within the folio which identifies the page. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation:: * * addr1 = kmap_local_folio(folio1, offset1); * addr2 = kmap_local_folio(folio2, offset2); * ... * kunmap_local(addr2); * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * * Contrary to kmap() mappings the mapping is only valid in the context of * the caller and cannot be handed to other contexts. * * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * * While it is significantly faster than kmap() for the highmem case it * comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across * preemption. No caller of kmap_local_folio() can rely on this side effect. * * Context: Can be invoked from any context. * Return: The virtual address of @offset. */ static inline void *kmap_local_folio(struct folio *folio, size_t offset); /** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * In fact a wrapper around kmap_local_page() which also disables pagefaults * and, depending on PREEMPT_RT configuration, also CPU migration and * preemption. Therefore users should not count on the latter two side effects. * * Mappings should always be released by kunmap_atomic(). * * Do not use in new code. Use kmap_local_page() instead. * * It is used in atomic context when code wants to access the contents of a * page that might be allocated from high memory (see __GFP_HIGHMEM), for * example a page in the pagecache. The API has two functions, and they * can be used in a manner similar to the following:: * * // Find the page of interest. * struct page *page = find_get_page(mapping, offset); * * // Gain access to the contents of that page. * void *vaddr = kmap_atomic(page); * * // Do something to the contents of that page. * memset(vaddr, 0, PAGE_SIZE); * * // Unmap that page. * kunmap_atomic(vaddr); * * Note that the kunmap_atomic() call takes the result of the kmap_atomic() * call, not the argument. * * If you need to map two pages because you want to copy from one page to * another you need to keep the kmap_atomic calls strictly nested, like: * * vaddr1 = kmap_atomic(page1); * vaddr2 = kmap_atomic(page2); * * memcpy(vaddr1, vaddr2, PAGE_SIZE); * * kunmap_atomic(vaddr2); * kunmap_atomic(vaddr1); */ static inline void *kmap_atomic(struct page *page); /* Highmem related interfaces for management code */ static inline unsigned long nr_free_highpages(void); static inline unsigned long totalhigh_pages(void); #ifndef ARCH_HAS_FLUSH_ANON_PAGE static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { } #endif #ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE static inline void flush_kernel_vmap_range(void *vaddr, int size) { } static inline void invalidate_kernel_vmap_range(void *vaddr, int size) { } #endif /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_local_page(page); clear_user_page(addr, vaddr, page); kunmap_local(addr); } #endif #ifndef vma_alloc_zeroed_movable_folio /** * vma_alloc_zeroed_movable_folio - Allocate a zeroed page for a VMA. * @vma: The VMA the page is to be allocated for. * @vaddr: The virtual address the page will be inserted into. * * This function will allocate a page suitable for inserting into this * VMA at this virtual address. It may be allocated from highmem or * the movable zone. An architecture may provide its own implementation. * * Return: A folio containing one allocated and zeroed page or NULL if * we are out of memory. */ static inline struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr) { struct folio *folio; folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr, false); if (folio) clear_user_highpage(&folio->page, vaddr); return folio; } #endif static inline void clear_highpage(struct page *page) { void *kaddr = kmap_local_page(page); clear_page(kaddr); kunmap_local(kaddr); } static inline void clear_highpage_kasan_tagged(struct page *page) { void *kaddr = kmap_local_page(page); clear_page(kasan_reset_tag(kaddr)); kunmap_local(kaddr); } #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE static inline void tag_clear_highpage(struct page *page) { } #endif /* * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. */ #ifdef CONFIG_HIGHMEM void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2); #else static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { void *kaddr = kmap_local_page(page); unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); if (end1 > start1) memset(kaddr + start1, 0, end1 - start1); if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); kunmap_local(kaddr); for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } #endif static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) { zero_user_segments(page, start, end, 0, 0); } static inline void zero_user(struct page *page, unsigned start, unsigned size) { zero_user_segments(page, start, start + size, 0, 0); } #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_user_page(vto, vfrom, vaddr, to); kmsan_unpoison_memory(page_address(to), PAGE_SIZE); kunmap_local(vto); kunmap_local(vfrom); } #endif #ifndef __HAVE_ARCH_COPY_HIGHPAGE static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_page(vto, vfrom); kmsan_copy_page_meta(to, from); kunmap_local(vto); kunmap_local(vfrom); } #endif #ifdef copy_mc_to_kernel /* * If architecture supports machine check exception handling, define the * #MC versions of copy_user_highpage and copy_highpage. They copy a memory * page with #MC in source page (@from) handled, and return the number * of bytes not copied if there was a #MC, otherwise 0 for success. */ static inline int copy_mc_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { unsigned long ret; char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); if (!ret) kmsan_unpoison_memory(page_address(to), PAGE_SIZE); kunmap_local(vto); kunmap_local(vfrom); if (ret) memory_failure_queue(page_to_pfn(from), 0); return ret; } static inline int copy_mc_highpage(struct page *to, struct page *from) { unsigned long ret; char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); if (!ret) kmsan_copy_page_meta(to, from); kunmap_local(vto); kunmap_local(vfrom); if (ret) memory_failure_queue(page_to_pfn(from), 0); return ret; } #else static inline int copy_mc_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { copy_user_highpage(to, from, vaddr, vma); return 0; } static inline int copy_mc_highpage(struct page *to, struct page *from) { copy_highpage(to, from); return 0; } #endif static inline void memcpy_page(struct page *dst_page, size_t dst_off, struct page *src_page, size_t src_off, size_t len) { char *dst = kmap_local_page(dst_page); char *src = kmap_local_page(src_page); VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); memcpy(dst + dst_off, src + src_off, len); kunmap_local(src); kunmap_local(dst); } static inline void memset_page(struct page *page, size_t offset, int val, size_t len) { char *addr = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memset(addr + offset, val, len); kunmap_local(addr); } static inline void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) { char *from = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to, from + offset, len); kunmap_local(from); } static inline void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) { char *to = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); flush_dcache_page(page); kunmap_local(to); } static inline void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memset(addr + offset, 0, len); flush_dcache_page(page); kunmap_local(addr); } /** * memcpy_from_folio - Copy a range of bytes from a folio. * @to: The memory to copy to. * @folio: The folio to read from. * @offset: The first byte in the folio to read. * @len: The number of bytes to copy. */ static inline void memcpy_from_folio(char *to, struct folio *folio, size_t offset, size_t len) { VM_BUG_ON(offset + len > folio_size(folio)); do { const char *from = kmap_local_folio(folio, offset); size_t chunk = len; if (folio_test_highmem(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); kunmap_local(from); to += chunk; offset += chunk; len -= chunk; } while (len > 0); } /** * memcpy_to_folio - Copy a range of bytes to a folio. * @folio: The folio to write to. * @offset: The first byte in the folio to store to. * @from: The memory to copy from. * @len: The number of bytes to copy. */ static inline void memcpy_to_folio(struct folio *folio, size_t offset, const char *from, size_t len) { VM_BUG_ON(offset + len > folio_size(folio)); do { char *to = kmap_local_folio(folio, offset); size_t chunk = len; if (folio_test_highmem(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); kunmap_local(to); from += chunk; offset += chunk; len -= chunk; } while (len > 0); flush_dcache_folio(folio); } /** * folio_zero_tail - Zero the tail of a folio. * @folio: The folio to zero. * @offset: The byte offset in the folio to start zeroing at. * @kaddr: The address the folio is currently mapped to. * * If you have already used kmap_local_folio() to map a folio, written * some data to it and now need to zero the end of the folio (and flush * the dcache), you can use this function. If you do not have the * folio kmapped (eg the folio has been partially populated by DMA), * use folio_zero_range() or folio_zero_segment() instead. * * Return: An address which can be passed to kunmap_local(). */ static inline __must_check void *folio_zero_tail(struct folio *folio, size_t offset, void *kaddr) { size_t len = folio_size(folio) - offset; if (folio_test_highmem(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { memset(kaddr, 0, max); kunmap_local(kaddr); len -= max; offset += max; max = PAGE_SIZE; kaddr = kmap_local_folio(folio, offset); } } memset(kaddr, 0, len); flush_dcache_folio(folio); return kaddr; } /** * folio_fill_tail - Copy some data to a folio and pad with zeroes. * @folio: The destination folio. * @offset: The offset into @folio at which to start copying. * @from: The data to copy. * @len: How many bytes of data to copy. * * This function is most useful for filesystems which support inline data. * When they want to copy data from the inode into the page cache, this * function does everything for them. It supports large folios even on * HIGHMEM configurations. */ static inline void folio_fill_tail(struct folio *folio, size_t offset, const char *from, size_t len) { char *to = kmap_local_folio(folio, offset); VM_BUG_ON(offset + len > folio_size(folio)); if (folio_test_highmem(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { memcpy(to, from, max); kunmap_local(to); len -= max; from += max; offset += max; max = PAGE_SIZE; to = kmap_local_folio(folio, offset); } } memcpy(to, from, len); to = folio_zero_tail(folio, offset + len, to + len); kunmap_local(to); } /** * memcpy_from_file_folio - Copy some bytes from a file folio. * @to: The destination buffer. * @folio: The folio to copy from. * @pos: The position in the file. * @len: The maximum number of bytes to copy. * * Copy up to @len bytes from this folio. This may be limited by PAGE_SIZE * if the folio comes from HIGHMEM, and by the size of the folio. * * Return: The number of bytes copied from the folio. */ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, loff_t pos, size_t len) { size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); if (folio_test_highmem(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else len = min(len, folio_size(folio) - offset); memcpy(to, from, len); kunmap_local(from); return len; } /** * folio_zero_segments() - Zero two byte ranges in a folio. * @folio: The folio to write to. * @start1: The first byte to zero. * @xend1: One more than the last byte in the first range. * @start2: The first byte to zero in the second range. * @xend2: One more than the last byte in the second range. */ static inline void folio_zero_segments(struct folio *folio, size_t start1, size_t xend1, size_t start2, size_t xend2) { zero_user_segments(&folio->page, start1, xend1, start2, xend2); } /** * folio_zero_segment() - Zero a byte range in a folio. * @folio: The folio to write to. * @start: The first byte to zero. * @xend: One more than the last byte to zero. */ static inline void folio_zero_segment(struct folio *folio, size_t start, size_t xend) { zero_user_segments(&folio->page, start, xend, 0, 0); } /** * folio_zero_range() - Zero a byte range in a folio. * @folio: The folio to write to. * @start: The first byte to zero. * @length: The number of bytes to zero. */ static inline void folio_zero_range(struct folio *folio, size_t start, size_t length) { zero_user_segments(&folio->page, start, start + length, 0, 0); } /** * folio_release_kmap - Unmap a folio and drop a refcount. * @folio: The folio to release. * @addr: The address previously returned by a call to kmap_local_folio(). * * It is common, eg in directory handling to kmap a folio. This function * unmaps the folio and drops the refcount that was being held to keep the * folio alive while we accessed it. */ static inline void folio_release_kmap(struct folio *folio, void *addr) { kunmap_local(addr); folio_put(folio); } static inline void unmap_and_put_page(struct page *page, void *addr) { folio_release_kmap(page_folio(page), addr); } #endif /* _LINUX_HIGHMEM_H */ |
6 6 5 5 5 2 1 27 1 3 14 9 10 2 5 7 5 5 13 36 2 27 1 11 2 10 1 4 4 4 1 1 9 9 9 9 9 10 10 24 25 5 4 4 4 4 19 19 10 9 1 8 10 26 26 26 7 9 9 4 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 | /* AFS superblock handling * * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved. * * This software may be freely redistributed under the terms of the * GNU General Public License. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Authors: David Howells <dhowells@redhat.com> * David Woodhouse <dwmw2@infradead.org> * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/fs_parser.h> #include <linux/statfs.h> #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/magic.h> #include <net/net_namespace.h> #include "internal.h" static void afs_i_init_once(void *foo); static void afs_kill_super(struct super_block *sb); static struct inode *afs_alloc_inode(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); static void afs_free_inode(struct inode *inode); static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); static int afs_show_devname(struct seq_file *m, struct dentry *root); static int afs_show_options(struct seq_file *m, struct dentry *root); static int afs_init_fs_context(struct fs_context *fc); static const struct fs_parameter_spec afs_fs_parameters[]; struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .init_fs_context = afs_init_fs_context, .parameters = afs_fs_parameters, .kill_sb = afs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE, }; MODULE_ALIAS_FS("afs"); int afs_net_id; static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, .write_inode = netfs_unpin_writeback, .drop_inode = afs_drop_inode, .destroy_inode = afs_destroy_inode, .free_inode = afs_free_inode, .evict_inode = afs_evict_inode, .show_devname = afs_show_devname, .show_options = afs_show_options, }; static struct kmem_cache *afs_inode_cachep; static atomic_t afs_count_active_inodes; enum afs_param { Opt_autocell, Opt_dyn, Opt_flock, Opt_source, }; static const struct constant_table afs_param_flock[] = { {"local", afs_flock_mode_local }, {"openafs", afs_flock_mode_openafs }, {"strict", afs_flock_mode_strict }, {"write", afs_flock_mode_write }, {} }; static const struct fs_parameter_spec afs_fs_parameters[] = { fsparam_flag ("autocell", Opt_autocell), fsparam_flag ("dyn", Opt_dyn), fsparam_enum ("flock", Opt_flock, afs_param_flock), fsparam_string("source", Opt_source), {} }; /* * initialise the filesystem */ int __init afs_fs_init(void) { int ret; _enter(""); /* create ourselves an inode cache */ atomic_set(&afs_count_active_inodes, 0); ret = -ENOMEM; afs_inode_cachep = kmem_cache_create("afs_inode_cache", sizeof(struct afs_vnode), 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, afs_i_init_once); if (!afs_inode_cachep) { printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n"); return ret; } /* now export our filesystem to lesser mortals */ ret = register_filesystem(&afs_fs_type); if (ret < 0) { kmem_cache_destroy(afs_inode_cachep); _leave(" = %d", ret); return ret; } _leave(" = 0"); return 0; } /* * clean up the filesystem */ void afs_fs_exit(void) { _enter(""); afs_mntpt_kill_timer(); unregister_filesystem(&afs_fs_type); if (atomic_read(&afs_count_active_inodes) != 0) { printk("kAFS: %d active inode objects still present\n", atomic_read(&afs_count_active_inodes)); BUG(); } /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(afs_inode_cachep); _leave(""); } /* * Display the mount device name in /proc/mounts. */ static int afs_show_devname(struct seq_file *m, struct dentry *root) { struct afs_super_info *as = AFS_FS_S(root->d_sb); struct afs_volume *volume = as->volume; struct afs_cell *cell = as->cell; const char *suf = ""; char pref = '%'; if (as->dyn_root) { seq_puts(m, "none"); return 0; } switch (volume->type) { case AFSVL_RWVOL: break; case AFSVL_ROVOL: pref = '#'; if (volume->type_force) suf = ".readonly"; break; case AFSVL_BACKVOL: pref = '#'; suf = ".backup"; break; } seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf); return 0; } /* * Display the mount options in /proc/mounts. */ static int afs_show_options(struct seq_file *m, struct dentry *root) { struct afs_super_info *as = AFS_FS_S(root->d_sb); const char *p = NULL; if (as->dyn_root) seq_puts(m, ",dyn"); if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags)) seq_puts(m, ",autocell"); switch (as->flock_mode) { case afs_flock_mode_unset: break; case afs_flock_mode_local: p = "local"; break; case afs_flock_mode_openafs: p = "openafs"; break; case afs_flock_mode_strict: p = "strict"; break; case afs_flock_mode_write: p = "write"; break; } if (p) seq_printf(m, ",flock=%s", p); return 0; } /* * Parse the source name to get cell name, volume name, volume type and R/W * selector. * * This can be one of the following: * "%[cell:]volume[.]" R/W volume * "#[cell:]volume[.]" R/O or R/W volume (R/O parent), * or R/W (R/W parent) volume * "%[cell:]volume.readonly" R/O volume * "#[cell:]volume.readonly" R/O volume * "%[cell:]volume.backup" Backup volume * "#[cell:]volume.backup" Backup volume */ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) { struct afs_fs_context *ctx = fc->fs_private; struct afs_cell *cell; const char *cellname, *suffix, *name = param->string; int cellnamesz; _enter(",%s", name); if (fc->source) return invalf(fc, "kAFS: Multiple sources not supported"); if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; } if ((name[0] != '%' && name[0] != '#') || !name[1]) { /* To use dynroot, we don't want to have to provide a source */ if (strcmp(name, "none") == 0) { ctx->no_cell = true; return 0; } printk(KERN_ERR "kAFS: unparsable volume name\n"); return -EINVAL; } /* determine the type of volume we're looking for */ if (name[0] == '%') { ctx->type = AFSVL_RWVOL; ctx->force = true; } name++; /* split the cell name out if there is one */ ctx->volname = strchr(name, ':'); if (ctx->volname) { cellname = name; cellnamesz = ctx->volname - name; ctx->volname++; } else { ctx->volname = name; cellname = NULL; cellnamesz = 0; } /* the volume type is further affected by a possible suffix */ suffix = strrchr(ctx->volname, '.'); if (suffix) { if (strcmp(suffix, ".readonly") == 0) { ctx->type = AFSVL_ROVOL; ctx->force = true; } else if (strcmp(suffix, ".backup") == 0) { ctx->type = AFSVL_BACKVOL; ctx->force = true; } else if (suffix[1] == 0) { } else { suffix = NULL; } } ctx->volnamesz = suffix ? suffix - ctx->volname : strlen(ctx->volname); _debug("cell %*.*s [%p]", cellnamesz, cellnamesz, cellname ?: "", ctx->cell); /* lookup the cell record */ if (cellname) { cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, NULL, false); if (IS_ERR(cell)) { pr_err("kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse); afs_see_cell(cell, afs_cell_trace_see_source); ctx->cell = cell; } _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", ctx->cell->name, ctx->cell, ctx->volnamesz, ctx->volnamesz, ctx->volname, suffix ?: "-", ctx->type, ctx->force ? " FORCE" : ""); fc->source = param->string; param->string = NULL; return 0; } /* * Parse a single mount parameter. */ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct afs_fs_context *ctx = fc->fs_private; int opt; opt = fs_parse(fc, afs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_source: return afs_parse_source(fc, param); case Opt_autocell: ctx->autocell = true; break; case Opt_dyn: ctx->dyn_root = true; break; case Opt_flock: ctx->flock_mode = result.uint_32; break; default: return -EINVAL; } _leave(" = 0"); return 0; } /* * Validate the options, get the cell key and look up the volume. */ static int afs_validate_fc(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_volume *volume; struct afs_cell *cell; struct key *key; int ret; if (!ctx->dyn_root) { if (ctx->no_cell) { pr_warn("kAFS: Can only specify source 'none' with -o dyn\n"); return -EINVAL; } if (!ctx->cell) { pr_warn("kAFS: No cell specified\n"); return -EDESTADDRREQ; } reget_key: /* We try to do the mount securely. */ key = afs_request_key(ctx->cell); if (IS_ERR(key)) return PTR_ERR(key); ctx->key = key; if (ctx->volume) { afs_put_volume(ctx->volume, afs_volume_trace_put_validate_fc); ctx->volume = NULL; } if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) { ret = afs_cell_detect_alias(ctx->cell, key); if (ret < 0) return ret; if (ret == 1) { _debug("switch to alias"); key_put(ctx->key); ctx->key = NULL; cell = afs_use_cell(ctx->cell->alias_of, afs_cell_trace_use_fc_alias); afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); ctx->cell = cell; goto reget_key; } } volume = afs_create_volume(ctx); if (IS_ERR(volume)) return PTR_ERR(volume); ctx->volume = volume; if (volume->type != AFSVL_RWVOL) { ctx->flock_mode = afs_flock_mode_local; fc->sb_flags |= SB_RDONLY; } } return 0; } /* * check a superblock to see if it's the one we're looking for */ static int afs_test_super(struct super_block *sb, struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as = AFS_FS_S(sb); return (as->net_ns == fc->net_ns && as->volume && as->volume->vid == ctx->volume->vid && as->cell == ctx->cell && !as->dyn_root); } static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc) { struct afs_super_info *as = AFS_FS_S(sb); return (as->net_ns == fc->net_ns && as->dyn_root); } static int afs_set_super(struct super_block *sb, struct fs_context *fc) { return set_anon_super(sb, NULL); } /* * fill in the superblock */ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) { struct afs_super_info *as = AFS_FS_S(sb); struct inode *inode = NULL; int ret; _enter(""); /* fill in the superblock */ sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; if (!as->dyn_root) sb->s_xattr = afs_xattr_handlers; ret = super_setup_bdi(sb); if (ret) return ret; /* allocate the root inode and dentry */ if (as->dyn_root) { inode = afs_iget_pseudo_dir(sb, true); } else { sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); inode = afs_root_iget(sb, ctx->key); } if (IS_ERR(inode)) return PTR_ERR(inode); if (ctx->autocell || as->dyn_root) set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); ret = -ENOMEM; sb->s_root = d_make_root(inode); if (!sb->s_root) goto error; if (as->dyn_root) { sb->s_d_op = &afs_dynroot_dentry_operations; ret = afs_dynroot_populate(sb); if (ret < 0) goto error; } else { sb->s_d_op = &afs_fs_dentry_operations; rcu_assign_pointer(as->volume->sb, sb); } _leave(" = 0"); return 0; error: _leave(" = %d", ret); return ret; } static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as; as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { as->net_ns = get_net(fc->net_ns); as->flock_mode = ctx->flock_mode; if (ctx->dyn_root) { as->dyn_root = true; } else { as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi); as->volume = afs_get_volume(ctx->volume, afs_volume_trace_get_alloc_sbi); } } return as; } static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { struct afs_net *net = afs_net(as->net_ns); afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi); afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi); put_net(as->net_ns); kfree(as); } } static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = AFS_FS_S(sb); if (as->dyn_root) afs_dynroot_depopulate(sb); /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ if (as->volume) rcu_assign_pointer(as->volume->sb, NULL); kill_anon_super(sb); if (as->volume) afs_deactivate_volume(as->volume); afs_destroy_sbi(as); } /* * Get an AFS superblock and root directory. */ static int afs_get_tree(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct super_block *sb; struct afs_super_info *as; int ret; ret = afs_validate_fc(fc); if (ret) goto error; _enter(""); /* allocate a superblock info record */ ret = -ENOMEM; as = afs_alloc_sbi(fc); if (!as) goto error; fc->s_fs_info = as; /* allocate a deviceless superblock */ sb = sget_fc(fc, as->dyn_root ? afs_dynroot_test_super : afs_test_super, afs_set_super); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto error; } if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); ret = afs_fill_super(sb, ctx); if (ret < 0) goto error_sb; sb->s_flags |= SB_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, SB_ACTIVE); } fc->root = dget(sb->s_root); trace_afs_get_tree(as->cell, as->volume); _leave(" = 0 [%p]", sb); return 0; error_sb: deactivate_locked_super(sb); error: _leave(" = %d", ret); return ret; } static void afs_free_fc(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; afs_destroy_sbi(fc->s_fs_info); afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc); afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); key_put(ctx->key); kfree(ctx); } static const struct fs_context_operations afs_context_ops = { .free = afs_free_fc, .parse_param = afs_parse_param, .get_tree = afs_get_tree, }; /* * Set up the filesystem mount context. */ static int afs_init_fs_context(struct fs_context *fc) { struct afs_fs_context *ctx; struct afs_cell *cell; ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->type = AFSVL_ROVOL; ctx->net = afs_net(fc->net_ns); /* Default to the workstation cell. */ cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc); if (IS_ERR(cell)) cell = NULL; ctx->cell = cell; fc->fs_private = ctx; fc->ops = &afs_context_ops; return 0; } /* * Initialise an inode cache slab element prior to any use. Note that * afs_alloc_inode() *must* reset anything that could incorrectly leak from one * inode to another. */ static void afs_i_init_once(void *_vnode) { struct afs_vnode *vnode = _vnode; memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->netfs.inode); mutex_init(&vnode->io_lock); init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); INIT_LIST_HEAD(&vnode->wb_keys); INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); INIT_LIST_HEAD(&vnode->cb_mmap_link); seqlock_init(&vnode->cb_lock); } /* * allocate an AFS inode struct from our slab cache */ static struct inode *afs_alloc_inode(struct super_block *sb) { struct afs_vnode *vnode; vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL); if (!vnode) return NULL; atomic_inc(&afs_count_active_inodes); /* Reset anything that shouldn't leak from one inode to the next. */ memset(&vnode->fid, 0, sizeof(vnode->fid)); memset(&vnode->status, 0, sizeof(vnode->status)); afs_vnode_set_cache(vnode, NULL); vnode->volume = NULL; vnode->lock_key = NULL; vnode->permit_cache = NULL; vnode->flags = 1 << AFS_VNODE_UNSET; vnode->lock_state = AFS_VNODE_LOCK_NONE; init_rwsem(&vnode->rmdir_lock); INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work); _leave(" = %p", &vnode->netfs.inode); return &vnode->netfs.inode; } static void afs_free_inode(struct inode *inode) { kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode)); } /* * destroy an AFS inode struct */ static void afs_destroy_inode(struct inode *inode) { struct afs_vnode *vnode = AFS_FS_I(inode); _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode); _debug("DESTROY INODE %p", inode); atomic_dec(&afs_count_active_inodes); } static void afs_get_volume_status_success(struct afs_operation *op) { struct afs_volume_status *vs = &op->volstatus.vs; struct kstatfs *buf = op->volstatus.buf; if (vs->max_quota == 0) buf->f_blocks = vs->part_max_blocks; else buf->f_blocks = vs->max_quota; if (buf->f_blocks > vs->blocks_in_use) buf->f_bavail = buf->f_bfree = buf->f_blocks - vs->blocks_in_use; } static const struct afs_operation_ops afs_get_volume_status_operation = { .issue_afs_rpc = afs_fs_get_volume_status, .issue_yfs_rpc = yfs_fs_get_volume_status, .success = afs_get_volume_status_success, }; /* * return information about an AFS volume */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct afs_super_info *as = AFS_FS_S(dentry->d_sb); struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = AFS_BLOCK_SIZE; buf->f_namelen = AFSNAMEMAX - 1; if (as->dyn_root) { |